|
|
| version 1.1, 2005/09/20 17:32:06 | version 1.7, 2006/12/14 15:50:05 |
|---|---|
| Line 3 | Line 3 |
| use strict; | use strict; |
| use XML::LibXML; | use XML::LibXML; |
| use lib '/usr/local/mpiwg/archive_devel'; | use lib '/usr/local/mpiwg/archive'; |
| use MPIWGStor; | use MPIWGStor; |
| # make output unbuffered | # make output unbuffered |
| $|=1; | $|=1; |
| # program version | # program version |
| my $version = "0.2 (19.9.2005 ROC)"; | my $version = "0.2.5 (14.12.2006 ROC)"; |
| my $help = | my $help = |
| "use: makemeta-vlp [options] file.xml | "use: makemeta-vlp [options] file.xml |
| options: | options: |
| Line 19 options: | Line 19 options: |
| -replace replace existing index files | -replace replace existing index files |
| -online-mode mode for creating online/permanent files | -online-mode mode for creating online/permanent files |
| -archive-mode mode for creating archive/data files | -archive-mode mode for creating archive/data files |
| -access=free adds free access tag for online-mode | |
| "; | "; |
| logger("INFO", "makemeta-vlp $version"); | logger("INFO", "makemeta-vlp $version"); |
| Line 27 logger("INFO", "makemeta-vlp $version"); | Line 28 logger("INFO", "makemeta-vlp $version"); |
| # generic mappings at top level | # generic mappings at top level |
| my %gen_map = ( | my %gen_map = ( |
| 'Custom2_Language' => 'meta/lang' | 'Custom2_Language' => 'meta/lang', |
| 'productionComment' => 'meta/image-acquisition/production-comment', | |
| 'derivedFrom' => 'derived-from/archive-path' | |
| ); | ); |
| # sub type switch tag | # sub type switch tag |
| my %type_map = ( | my %type_map = ( |
| Line 47 my %subtype_map = ( | Line 50 my %subtype_map = ( |
| 'NumberOfVolumes' => 'meta/bib/number-of-volumes', | 'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
| 'Pages' => 'meta/bib/number-of-pages' | 'Pages' => 'meta/bib/number-of-pages' |
| }, | }, |
| '(Book)' => { | |
| '_name' => 'book', | |
| 'Author' => 'meta/bib/author', | |
| 'Title' => 'meta/bib/title', | |
| 'Year' => 'meta/bib/year', | |
| 'Place_Published' => 'meta/bib/city', | |
| 'Publisher' => 'meta/bib/publisher', | |
| 'Edition' => 'meta/bib/edition', | |
| 'Volume' => 'meta/bib/volume', | |
| 'NumberOfVolumes' => 'meta/bib/number-of-volumes', | |
| 'Pages' => 'meta/bib/number-of-pages', | |
| '#Cover pages only, book sections have been extracted' => 'meta/bib/comment' | |
| }, | |
| 'Book Section' => { | 'Book Section' => { |
| '_name' => 'inbook', | '_name' => 'inbook', |
| 'Author' => 'meta/bib/author', | 'Author' => 'meta/bib/author', |
| 'Title' => 'meta/bib/title', | 'Title' => 'meta/bib/title', |
| 'Year' => 'meta/bib/year', | 'Year' => 'meta/bib/year', |
| 'Secondary_Title' => 'meta/bib/book-title', | 'SecondaryTitle' => 'meta/bib/book-title', |
| 'SecondaryAuthor' => 'meta/bib/editor', | 'SecondaryAuthor' => 'meta/bib/editor', |
| 'Volume' => 'meta/bib/volume', | 'Volume' => 'meta/bib/volume', |
| 'NumberOfVolumes' => 'meta/bib/number-of-volumes', | 'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
| Line 68 my %subtype_map = ( | Line 84 my %subtype_map = ( |
| 'Edition' => 'meta/bib/edition', | 'Edition' => 'meta/bib/edition', |
| 'Volume' => 'meta/bib/volume', | 'Volume' => 'meta/bib/volume', |
| 'NumberOfVolumes' => 'meta/bib/number-of-volumes', | 'NumberOfVolumes' => 'meta/bib/number-of-volumes', |
| 'Pages' => 'meta/bib/number-of-pages', | |
| '#Cover pages only, book sections have been extracted' => 'meta/bib/comment' | |
| }, | |
| '(Edited Book)' => { | |
| '_name' => 'edited-book', | |
| 'Author' => 'meta/bib/editor', | |
| 'Title' => 'meta/bib/title', | |
| 'Year' => 'meta/bib/year', | |
| 'Place_Published' => 'meta/bib/city', | |
| 'Publisher' => 'meta/bib/publisher', | |
| 'Edition' => 'meta/bib/edition', | |
| 'Volume' => 'meta/bib/volume', | |
| 'NumberOfVolumes' => 'meta/bib/number-of-volumes', | |
| 'Pages' => 'meta/bib/number-of-pages' | 'Pages' => 'meta/bib/number-of-pages' |
| }, | }, |
| 'Journal Article' => { | 'Journal Article' => { |
| Line 80 my %subtype_map = ( | Line 109 my %subtype_map = ( |
| 'Number_Issue' => 'meta/bib/issue', | 'Number_Issue' => 'meta/bib/issue', |
| 'Pages' => 'meta/bib/pages' | 'Pages' => 'meta/bib/pages' |
| }, | }, |
| '(JournalVolume)' => { | |
| '_name' => 'journal-volume', | |
| 'SecondaryTitle' => 'meta/bib/title', | |
| 'SecondaryAuthor' => 'meta/bib/editor', | |
| 'Publisher' => 'meta/bib/publisher', | |
| 'Place_Published' => 'meta/bib/city', | |
| 'Year' => 'meta/bib/year', | |
| 'Volume' => 'meta/bib/volume', | |
| 'Pages' => 'meta/bib/number-of-pages', | |
| '#Cover pages only, articles have been extracted' => 'meta/bib/comment' | |
| }, | |
| 'Magazine Article' => { | 'Magazine Article' => { |
| '_name' => 'magazine-article', | '_name' => 'magazine-article', |
| 'Author' => 'meta/bib/author', | 'Author' => 'meta/bib/author', |
| Line 182 logger('DEBUG', "online_mode: $online_mo | Line 222 logger('DEBUG', "online_mode: $online_mo |
| my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; | my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; |
| logger('DEBUG', "archive_mode: $archive_mode"); | logger('DEBUG', "archive_mode: $archive_mode"); |
| # access type | |
| my $access_type = (exists $$args{'access'}) ? $$args{'access'} : ""; | |
| # index.meta namespace (not really implemented!) | # index.meta namespace (not really implemented!) |
| my $namespace = ""; | my $namespace = ""; |
| Line 229 sub find_arch_dir { | Line 272 sub find_arch_dir { |
| sub find_permanent_dir { | sub find_permanent_dir { |
| my ($input_node) = @_; | my ($input_node) = @_; |
| my $online_base = '/mpiwg/online/permanent'; | my $online_base = $lib_online_dir; |
| my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); | my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); |
| if (! $dest_id) { | if (! $dest_id) { |
| logger('ERROR', "no ID field for online permanent entry"); | logger('ERROR', "no ID field for online permanent entry"); |
| Line 301 sub convert_bib { | Line 344 sub convert_bib { |
| $cnt++; | $cnt++; |
| } | } |
| } | } |
| # append additional constant fields (beginning with #) | |
| foreach my $k (keys %{$subtype_map{$type}}) { | |
| if ($k =~ /^\#(.*)/) { | |
| my $val = $1; | |
| create_text_path($subtype_map{$type}->{$k}, $val, $index_root, $namespace); | |
| } | |
| } | |
| } | } |
| return $cnt; | return $cnt; |
| } | } |
| Line 363 sub process_fm_entry { | Line 413 sub process_fm_entry { |
| create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); | create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); |
| create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); | create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); |
| create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); | create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); |
| # image acquisition | |
| create_text_path('meta/image-acquisition/device', 'Flatbed Scanner' , $index_root, $namespace); | |
| create_text_path('meta/image-acquisition/image-type', 'Greyscale' , $index_root, $namespace); | |
| create_text_path('meta/image-acquisition/production-comment', 'Raw scans in \'raw\' folder, cleaned pages in \'pages\' folder.' , $index_root, $namespace); | |
| } | } |
| # media | # media |
| create_text_path('media-type', 'image', $index_root, $namespace); | create_text_path('media-type', 'image', $index_root, $namespace); |
| create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); | create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
| # access | |
| if ($access_type) { | |
| if ($access_type eq "free") { | |
| create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); | |
| } else { | |
| my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); | |
| create_text_path('name', $access_type, $acc_tag, $namespace); | |
| } | |
| } | |
| # convert bib entries | # convert bib entries |
| my $cnt = convert_bib($input_node, $index_root, $index_doc); | my $cnt = convert_bib($input_node, $index_root, $index_doc); |