--- foxridge-archiver/makemeta-vlp.pl 2006/06/27 15:25:40 1.5 +++ foxridge-archiver/makemeta-vlp.pl 2017/03/16 17:00:43 1.10 @@ -1,4 +1,4 @@ -#!/usr/local/bin/perl -w +#!/usr/bin/perl -w use strict; use XML::LibXML; @@ -10,7 +10,7 @@ use MPIWGStor; $|=1; # program version -my $version = "0.2.3 (27.6.2006 ROC)"; +my $version = "0.2.7 (27.8.2010 ROC)"; my $help = "use: makemeta-vlp [options] file.xml options: @@ -19,6 +19,8 @@ options: -replace replace existing index files -online-mode mode for creating online/permanent files -archive-mode mode for creating archive/data files + -access=free adds free access tag for online-mode + -texttool adds texttool tag for online-mode "; logger("INFO", "makemeta-vlp $version"); @@ -27,7 +29,9 @@ logger("INFO", "makemeta-vlp $version"); # generic mappings at top level my %gen_map = ( - 'Custom2_Language' => 'meta/lang' + 'Custom2_Language' => 'meta/lang', + 'productionComment' => 'meta/image-acquisition/production-comment', + 'derivedFrom' => 'derived-from/archive-path' ); # sub type switch tag my %type_map = ( @@ -117,6 +121,16 @@ my %subtype_map = ( 'Pages' => 'meta/bib/number-of-pages', '#Cover pages only, articles have been extracted' => 'meta/bib/comment' }, + 'Journal' => { + '_name' => 'report', + 'Title' => 'meta/bib/title', + 'SecondaryTitle' => 'meta/bib/institution', + 'Author' => 'meta/bib/author', + 'Place_Published' => 'meta/bib/city', + 'Year' => 'meta/bib/year', + 'Date' => 'meta/bib/date', + 'Pages' => 'meta/bib/pages', + }, 'Magazine Article' => { '_name' => 'magazine-article', 'Author' => 'meta/bib/author', @@ -127,6 +141,15 @@ my %subtype_map = ( 'Date' => 'meta/bib/issue-date', 'Pages' => 'meta/bib/pages' }, + 'Newspaper Article' => { + '_name' => 'newspaper-article', + 'Author' => 'meta/bib/author', + 'Title' => 'meta/bib/title', + 'Year' => 'meta/bib/year', + 'Secondary_Title' => 'meta/bib/newspaper', + 'Date' => 'meta/bib/issue-date', + 'Pages' => 'meta/bib/pages' + }, 'Report' => { '_name' => 'report', 'Author' => 'meta/bib/author', @@ -180,10 +203,14 @@ my %lang_map = ( 'Japanese' => 'ja', 'Dutch' => 'nl', 'Spanish' => 'es', - 'Swedish' => 'sv' + 'Swedish' => 'sv', + 'Russian' => 'ru', + 'Polish' => 'pl', + 'Greek' => 'el' ); # storage fields my $arch_id_field = 'ID'; +my $access_free_field = 'online'; ####################################################### # internal parameters @@ -219,6 +246,15 @@ logger('DEBUG', "online_mode: $online_mo my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; logger('DEBUG', "archive_mode: $archive_mode"); +# create texttool tag (online mode only) +my $texttool = (exists $$args{'texttool'}) ? $$args{'texttool'} : 1; +logger('DEBUG', "texttool: $texttool"); +# image dir for texttool +my $texttool_img_dir = "pages"; + +# access type +my $access_type = (exists $$args{'access'}) ? $$args{'access'} : ""; + # index.meta namespace (not really implemented!) my $namespace = ""; @@ -266,7 +302,7 @@ sub find_arch_dir { sub find_permanent_dir { my ($input_node) = @_; - my $online_base = '/mpiwg/online/permanent'; + my $online_base = $lib_online_dir; my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); if (! $dest_id) { logger('ERROR', "no ID field for online permanent entry"); @@ -274,7 +310,11 @@ sub find_permanent_dir { return; } my $dir = "$online_base/lit$dest_id"; - return $dir; + if (-d $dir) { + logger('DEBUG', "directory $dir exists"); + return $dir; + } + return; } @@ -407,14 +447,38 @@ sub process_fm_entry { create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); - # image acquisition - create_text_path('meta/image-acquisition/device', 'Flatbed Scanner' , $index_root, $namespace); - create_text_path('meta/image-acquisition/image-type', 'Greyscale' , $index_root, $namespace); - create_text_path('meta/image-acquisition/production-comment', 'Raw scans in \'raw\' folder, cleaned pages in \'pages\' folder.' , $index_root, $namespace); } # media create_text_path('media-type', 'image', $index_root, $namespace); create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); + # access + if ($access_type) { + if ($access_type eq "free") { + create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); + } else { + my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); + create_text_path('name', $access_type, $acc_tag, $namespace); + } + } elsif ($online_mode) { + # read access conditions from "online" field in DB dump + my $online = sstrip($input_node->findvalue("fm:$access_free_field")); + if ($online) { + create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); + } else { + my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); + create_text_path('name', 'mpiwg', $acc_tag, $namespace); + } + } + + # texttool tag with image dir + if ($online_mode && $texttool) { + if ( -d "$doc_dir/$texttool_img_dir" ) { + create_text_path('meta/texttool/image', $texttool_img_dir,$index_root, $namespace); + } else { + logger('WARNING', "page image directory missing!"); + $warncnt++; + } + } # convert bib entries my $cnt = convert_bib($input_node, $index_root, $index_doc);