version 1.1, 2005/09/20 17:32:06
|
version 1.7, 2006/12/14 15:50:05
|
Line 3
|
Line 3
|
use strict; |
use strict; |
use XML::LibXML; |
use XML::LibXML; |
|
|
use lib '/usr/local/mpiwg/archive_devel'; |
use lib '/usr/local/mpiwg/archive'; |
use MPIWGStor; |
use MPIWGStor; |
|
|
# make output unbuffered |
# make output unbuffered |
$|=1; |
$|=1; |
|
|
# program version |
# program version |
my $version = "0.2 (19.9.2005 ROC)"; |
my $version = "0.2.5 (14.12.2006 ROC)"; |
my $help = |
my $help = |
"use: makemeta-vlp [options] file.xml |
"use: makemeta-vlp [options] file.xml |
options: |
options: |
Line 19 options:
|
Line 19 options:
|
-replace replace existing index files |
-replace replace existing index files |
-online-mode mode for creating online/permanent files |
-online-mode mode for creating online/permanent files |
-archive-mode mode for creating archive/data files |
-archive-mode mode for creating archive/data files |
|
-access=free adds free access tag for online-mode |
"; |
"; |
logger("INFO", "makemeta-vlp $version"); |
logger("INFO", "makemeta-vlp $version"); |
|
|
Line 27 logger("INFO", "makemeta-vlp $version");
|
Line 28 logger("INFO", "makemeta-vlp $version");
|
|
|
# generic mappings at top level |
# generic mappings at top level |
my %gen_map = ( |
my %gen_map = ( |
'Custom2_Language' => 'meta/lang' |
'Custom2_Language' => 'meta/lang', |
|
'productionComment' => 'meta/image-acquisition/production-comment', |
|
'derivedFrom' => 'derived-from/archive-path' |
); |
); |
# sub type switch tag |
# sub type switch tag |
my %type_map = ( |
my %type_map = ( |
Line 47 my %subtype_map = (
|
Line 50 my %subtype_map = (
|
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
'Pages' => 'meta/bib/number-of-pages' |
'Pages' => 'meta/bib/number-of-pages' |
}, |
}, |
|
'(Book)' => { |
|
'_name' => 'book', |
|
'Author' => 'meta/bib/author', |
|
'Title' => 'meta/bib/title', |
|
'Year' => 'meta/bib/year', |
|
'Place_Published' => 'meta/bib/city', |
|
'Publisher' => 'meta/bib/publisher', |
|
'Edition' => 'meta/bib/edition', |
|
'Volume' => 'meta/bib/volume', |
|
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
|
'Pages' => 'meta/bib/number-of-pages', |
|
'#Cover pages only, book sections have been extracted' => 'meta/bib/comment' |
|
}, |
'Book Section' => { |
'Book Section' => { |
'_name' => 'inbook', |
'_name' => 'inbook', |
'Author' => 'meta/bib/author', |
'Author' => 'meta/bib/author', |
'Title' => 'meta/bib/title', |
'Title' => 'meta/bib/title', |
'Year' => 'meta/bib/year', |
'Year' => 'meta/bib/year', |
'Secondary_Title' => 'meta/bib/book-title', |
'SecondaryTitle' => 'meta/bib/book-title', |
'SecondaryAuthor' => 'meta/bib/editor', |
'SecondaryAuthor' => 'meta/bib/editor', |
'Volume' => 'meta/bib/volume', |
'Volume' => 'meta/bib/volume', |
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
Line 68 my %subtype_map = (
|
Line 84 my %subtype_map = (
|
'Edition' => 'meta/bib/edition', |
'Edition' => 'meta/bib/edition', |
'Volume' => 'meta/bib/volume', |
'Volume' => 'meta/bib/volume', |
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
|
'Pages' => 'meta/bib/number-of-pages', |
|
'#Cover pages only, book sections have been extracted' => 'meta/bib/comment' |
|
}, |
|
'(Edited Book)' => { |
|
'_name' => 'edited-book', |
|
'Author' => 'meta/bib/editor', |
|
'Title' => 'meta/bib/title', |
|
'Year' => 'meta/bib/year', |
|
'Place_Published' => 'meta/bib/city', |
|
'Publisher' => 'meta/bib/publisher', |
|
'Edition' => 'meta/bib/edition', |
|
'Volume' => 'meta/bib/volume', |
|
'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
'Pages' => 'meta/bib/number-of-pages' |
'Pages' => 'meta/bib/number-of-pages' |
}, |
}, |
'Journal Article' => { |
'Journal Article' => { |
Line 80 my %subtype_map = (
|
Line 109 my %subtype_map = (
|
'Number_Issue' => 'meta/bib/issue', |
'Number_Issue' => 'meta/bib/issue', |
'Pages' => 'meta/bib/pages' |
'Pages' => 'meta/bib/pages' |
}, |
}, |
|
'(JournalVolume)' => { |
|
'_name' => 'journal-volume', |
|
'SecondaryTitle' => 'meta/bib/title', |
|
'SecondaryAuthor' => 'meta/bib/editor', |
|
'Publisher' => 'meta/bib/publisher', |
|
'Place_Published' => 'meta/bib/city', |
|
'Year' => 'meta/bib/year', |
|
'Volume' => 'meta/bib/volume', |
|
'Pages' => 'meta/bib/number-of-pages', |
|
'#Cover pages only, articles have been extracted' => 'meta/bib/comment' |
|
}, |
'Magazine Article' => { |
'Magazine Article' => { |
'_name' => 'magazine-article', |
'_name' => 'magazine-article', |
'Author' => 'meta/bib/author', |
'Author' => 'meta/bib/author', |
Line 182 logger('DEBUG', "online_mode: $online_mo
|
Line 222 logger('DEBUG', "online_mode: $online_mo
|
my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; |
my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; |
logger('DEBUG', "archive_mode: $archive_mode"); |
logger('DEBUG', "archive_mode: $archive_mode"); |
|
|
|
# access type |
|
my $access_type = (exists $$args{'access'}) ? $$args{'access'} : ""; |
|
|
# index.meta namespace (not really implemented!) |
# index.meta namespace (not really implemented!) |
my $namespace = ""; |
my $namespace = ""; |
|
|
Line 229 sub find_arch_dir {
|
Line 272 sub find_arch_dir {
|
|
|
sub find_permanent_dir { |
sub find_permanent_dir { |
my ($input_node) = @_; |
my ($input_node) = @_; |
my $online_base = '/mpiwg/online/permanent'; |
my $online_base = $lib_online_dir; |
my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); |
my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); |
if (! $dest_id) { |
if (! $dest_id) { |
logger('ERROR', "no ID field for online permanent entry"); |
logger('ERROR', "no ID field for online permanent entry"); |
Line 301 sub convert_bib {
|
Line 344 sub convert_bib {
|
$cnt++; |
$cnt++; |
} |
} |
} |
} |
|
# append additional constant fields (beginning with #) |
|
foreach my $k (keys %{$subtype_map{$type}}) { |
|
if ($k =~ /^\#(.*)/) { |
|
my $val = $1; |
|
create_text_path($subtype_map{$type}->{$k}, $val, $index_root, $namespace); |
|
} |
|
} |
} |
} |
return $cnt; |
return $cnt; |
} |
} |
Line 363 sub process_fm_entry {
|
Line 413 sub process_fm_entry {
|
create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); |
create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); |
create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); |
create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); |
create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); |
create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); |
# image acquisition |
|
create_text_path('meta/image-acquisition/device', 'Flatbed Scanner' , $index_root, $namespace); |
|
create_text_path('meta/image-acquisition/image-type', 'Greyscale' , $index_root, $namespace); |
|
create_text_path('meta/image-acquisition/production-comment', 'Raw scans in \'raw\' folder, cleaned pages in \'pages\' folder.' , $index_root, $namespace); |
|
} |
} |
# media |
# media |
create_text_path('media-type', 'image', $index_root, $namespace); |
create_text_path('media-type', 'image', $index_root, $namespace); |
create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
|
# access |
|
if ($access_type) { |
|
if ($access_type eq "free") { |
|
create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); |
|
} else { |
|
my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); |
|
create_text_path('name', $access_type, $acc_tag, $namespace); |
|
} |
|
} |
|
|
# convert bib entries |
# convert bib entries |
my $cnt = convert_bib($input_node, $index_root, $index_doc); |
my $cnt = convert_bib($input_node, $index_root, $index_doc); |