Diff for /foxridge-archiver/makemeta-vlp.pl between versions 1.3 and 1.10

version 1.3, 2006/06/13 15:04:27 version 1.10, 2017/03/16 17:00:43
Line 1 Line 1
 #!/usr/local/bin/perl -w  #!/usr/bin/perl -w
   
 use strict;  use strict;
 use XML::LibXML;  use XML::LibXML;
Line 10  use MPIWGStor; Line 10  use MPIWGStor;
 $|=1;  $|=1;
   
 # program version  # program version
 my $version = "0.2.1 (12.6.2006 ROC)";  my $version = "0.2.7 (27.8.2010 ROC)";
 my $help =   my $help = 
 "use: makemeta-vlp [options] file.xml  "use: makemeta-vlp [options] file.xml
 options:  options:
Line 19  options: Line 19  options:
   -replace  replace existing index files    -replace  replace existing index files
   -online-mode  mode for creating online/permanent files    -online-mode  mode for creating online/permanent files
   -archive-mode  mode for creating archive/data files    -archive-mode  mode for creating archive/data files
     -access=free  adds free access tag for online-mode
     -texttool adds texttool tag for online-mode
 ";  ";
 logger("INFO", "makemeta-vlp $version");  logger("INFO", "makemeta-vlp $version");
   
Line 27  logger("INFO", "makemeta-vlp $version"); Line 29  logger("INFO", "makemeta-vlp $version");
   
 # generic mappings at top level  # generic mappings at top level
 my %gen_map = (  my %gen_map = (
     'Custom2_Language' => 'meta/lang'      'Custom2_Language' => 'meta/lang',
       'productionComment' => 'meta/image-acquisition/production-comment',
       'derivedFrom' => 'derived-from/archive-path'
     );      );
 # sub type switch tag  # sub type switch tag
 my %type_map = (  my %type_map = (
Line 81  my %subtype_map = ( Line 85  my %subtype_map = (
     'Edition' => 'meta/bib/edition',      'Edition' => 'meta/bib/edition',
     'Volume' => 'meta/bib/volume',      'Volume' => 'meta/bib/volume',
     'NumberOfVolumes' => 'meta/bib/number-of-volumes',      'NumberOfVolumes' => 'meta/bib/number-of-volumes',
       'Pages' => 'meta/bib/number-of-pages',
       '#Cover pages only, book sections have been extracted' => 'meta/bib/comment'
       },
       '(Edited Book)' => {
       '_name' => 'edited-book',
       'Author' => 'meta/bib/editor',
       'Title' => 'meta/bib/title',
       'Year' => 'meta/bib/year',
       'Place_Published' => 'meta/bib/city',
       'Publisher' => 'meta/bib/publisher',
       'Edition' => 'meta/bib/edition',
       'Volume' => 'meta/bib/volume',
       'NumberOfVolumes' => 'meta/bib/number-of-volumes',
     'Pages' => 'meta/bib/number-of-pages'      'Pages' => 'meta/bib/number-of-pages'
     },      },
     'Journal Article' => {      'Journal Article' => {
Line 93  my %subtype_map = ( Line 110  my %subtype_map = (
     'Number_Issue' => 'meta/bib/issue',      'Number_Issue' => 'meta/bib/issue',
     'Pages' => 'meta/bib/pages'      'Pages' => 'meta/bib/pages'
     },      },
       '(JournalVolume)' => {
       '_name' => 'journal-volume',
       'SecondaryTitle' => 'meta/bib/title',
       'SecondaryAuthor' => 'meta/bib/editor',
       'Publisher' => 'meta/bib/publisher',
       'Place_Published' => 'meta/bib/city',
       'Year' => 'meta/bib/year',
       'Volume' => 'meta/bib/volume',
       'Pages' => 'meta/bib/number-of-pages',
       '#Cover pages only, articles have been extracted' => 'meta/bib/comment'
       },
       'Journal' => {
       '_name' => 'report',
       'Title' => 'meta/bib/title',
       'SecondaryTitle' => 'meta/bib/institution',
       'Author' => 'meta/bib/author',
       'Place_Published' => 'meta/bib/city',
       'Year' => 'meta/bib/year',
       'Date' => 'meta/bib/date',
       'Pages' => 'meta/bib/pages',
       },
     'Magazine Article' => {      'Magazine Article' => {
     '_name' => 'magazine-article',      '_name' => 'magazine-article',
     'Author' => 'meta/bib/author',      'Author' => 'meta/bib/author',
Line 103  my %subtype_map = ( Line 141  my %subtype_map = (
     'Date' => 'meta/bib/issue-date',      'Date' => 'meta/bib/issue-date',
     'Pages' => 'meta/bib/pages'      'Pages' => 'meta/bib/pages'
     },      },
       'Newspaper Article' => {
       '_name' => 'newspaper-article',
       'Author' => 'meta/bib/author',
       'Title' => 'meta/bib/title',
       'Year' => 'meta/bib/year',
       'Secondary_Title' => 'meta/bib/newspaper',
       'Date' => 'meta/bib/issue-date',
       'Pages' => 'meta/bib/pages'
       },
     'Report' => {      'Report' => {
     '_name' => 'report',      '_name' => 'report',
     'Author' => 'meta/bib/author',      'Author' => 'meta/bib/author',
Line 156  my %lang_map = ( Line 203  my %lang_map = (
     'Japanese' => 'ja',      'Japanese' => 'ja',
     'Dutch' => 'nl',      'Dutch' => 'nl',
     'Spanish' => 'es',      'Spanish' => 'es',
     'Swedish' => 'sv'      'Swedish' => 'sv',
       'Russian' => 'ru',
       'Polish' => 'pl',
       'Greek' => 'el'
     );      );
 # storage fields  # storage fields
 my $arch_id_field = 'ID';  my $arch_id_field = 'ID';
   my $access_free_field = 'online';
   
 #######################################################  #######################################################
 # internal parameters  # internal parameters
Line 195  logger('DEBUG', "online_mode: $online_mo Line 246  logger('DEBUG', "online_mode: $online_mo
 my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0;  my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0;
 logger('DEBUG', "archive_mode: $archive_mode");  logger('DEBUG', "archive_mode: $archive_mode");
   
   # create texttool tag (online mode only)
   my $texttool = (exists $$args{'texttool'}) ? $$args{'texttool'} : 1;
   logger('DEBUG', "texttool: $texttool");
   # image dir for texttool
   my $texttool_img_dir = "pages";
   
   # access type
   my $access_type = (exists $$args{'access'}) ? $$args{'access'} : "";
   
 # index.meta namespace (not really implemented!)  # index.meta namespace (not really implemented!)
 my $namespace = "";  my $namespace = "";
   
Line 242  sub find_arch_dir { Line 302  sub find_arch_dir {
   
 sub find_permanent_dir {  sub find_permanent_dir {
     my ($input_node) = @_;      my ($input_node) = @_;
     my $online_base = '/mpiwg/online/permanent';      my $online_base = $lib_online_dir;
     my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field"));      my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field"));
     if (! $dest_id) {      if (! $dest_id) {
     logger('ERROR', "no ID field for online permanent entry");      logger('ERROR', "no ID field for online permanent entry");
Line 250  sub find_permanent_dir { Line 310  sub find_permanent_dir {
     return;      return;
     }      }
     my $dir = "$online_base/lit$dest_id";      my $dir = "$online_base/lit$dest_id";
       if (-d $dir) {
           logger('DEBUG', "directory $dir exists"); 
     return $dir;      return $dir;
 }  }
       return;
   }
   
   
 sub convert_bib {  sub convert_bib {
Line 383  sub process_fm_entry { Line 447  sub process_fm_entry {
       create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace);        create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace);
       create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace);        create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace);
       create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace);        create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace);
       # image acquisition  
       create_text_path('meta/image-acquisition/device', 'Flatbed Scanner' , $index_root, $namespace);  
       create_text_path('meta/image-acquisition/image-type', 'Greyscale' , $index_root, $namespace);  
       create_text_path('meta/image-acquisition/production-comment', 'Raw scans in \'raw\' folder, cleaned pages in \'pages\' folder.' , $index_root, $namespace);  
     }      }
     # media      # media
     create_text_path('media-type', 'image', $index_root, $namespace);      create_text_path('media-type', 'image', $index_root, $namespace);
     create_text_path('meta/content-type', 'scanned document', $index_root, $namespace);      create_text_path('meta/content-type', 'scanned document', $index_root, $namespace);
       # access
       if ($access_type) {
       if ($access_type eq "free") {
           create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace);
       } else {
           my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace);
           create_text_path('name', $access_type, $acc_tag, $namespace);
       }
       } elsif ($online_mode) {
           # read access conditions from "online" field in DB dump
           my $online = sstrip($input_node->findvalue("fm:$access_free_field"));
           if ($online) {
           create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace);
       } else {
           my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace);
           create_text_path('name', 'mpiwg', $acc_tag, $namespace);
       }
       }
   
       # texttool tag with image dir
       if ($online_mode && $texttool) {
       if ( -d "$doc_dir/$texttool_img_dir" ) {
           create_text_path('meta/texttool/image', $texttool_img_dir,$index_root, $namespace);
       } else {
               logger('WARNING', "page image directory missing!");
               $warncnt++;
           }
       }
   
     # convert bib entries      # convert bib entries
     my $cnt = convert_bib($input_node, $index_root, $index_doc);      my $cnt = convert_bib($input_node, $index_root, $index_doc);

Removed from v.1.3  
changed lines
  Added in v.1.10


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>