|
|
| version 1.5, 2005/08/23 15:01:08 | version 1.9, 2006/05/16 19:56:32 |
|---|---|
| Line 3 | Line 3 |
| use strict; | use strict; |
| use XML::LibXML; | use XML::LibXML; |
| use lib '/usr/local/mpiwg/archive_devel'; | use lib '/usr/local/mpiwg/archive'; |
| use MPIWGStor; | use MPIWGStor; |
| # make output unbuffered | # make output unbuffered |
| $|=1; | $|=1; |
| # program version | # program version |
| my $version = "0.2.1 (15.8.2005)"; | my $version = "0.2.4 (16.5.2006 ROC)"; |
| my $help = | my $help = |
| "use: makemeta-lib [options] file.xml | "use: makemeta-lib [options] file.xml |
| options: | options: |
| -debug show debugging info | -debug show debugging info |
| -dry-run simulate, dont'do anything | -dry-run simulate, dont'do anything |
| -cw-mode mode for copying einstein_cq documents | -online-mode mode for creating online/permanent files |
| -cw-mode mode for copying einstein_cw archive documents | |
| -digifiles-mode mode for copying files from digifiles | -digifiles-mode mode for copying files from digifiles |
| -map-file=mapfile.xml digilib mapping file (for digifiles mode) | -map-file=mapfile.xml digilib mapping file (for digifiles mode) |
| -access=free adds free access tag (use access=mpiwg for restricted access) | |
| "; | "; |
| logger("INFO", "makemeta-lib $version"); | logger("INFO", "makemeta-lib $version"); |
| Line 135 $debug = (exists $$args{'debug'}) ? $$ar | Line 137 $debug = (exists $$args{'debug'}) ? $$ar |
| my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0; | my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0; |
| logger('DEBUG', "dry-run: $dry_run"); | logger('DEBUG', "dry-run: $dry_run"); |
| # use online mode | |
| my $online_mode = (exists $$args{'online-mode'}) ? $$args{'online-mode'} : 0; | |
| logger('DEBUG', "online_mode: $online_mode"); | |
| # use einstein-cw mode | # use einstein-cw mode |
| my $cw_mode = (exists $$args{'cw-mode'}) ? $$args{'cw-mode'} : 0; | my $cw_mode = (exists $$args{'cw-mode'}) ? $$args{'cw-mode'} : 0; |
| logger('DEBUG', "cw_mode: $cw_mode"); | logger('DEBUG', "cw_mode: $cw_mode"); |
| Line 148 logger('DEBUG', "map_file_name: $map_fil | Line 154 logger('DEBUG', "map_file_name: $map_fil |
| my $mapping_doc; | my $mapping_doc; |
| my $mapping_root; | my $mapping_root; |
| # access type | |
| my $access_type = (exists $$args{'access'}) ? $$args{'access'} : ""; | |
| # index.meta namespace (not really implemented!) | # index.meta namespace (not really implemented!) |
| my $namespace = ""; | my $namespace = ""; |
| Line 266 sub find_cw_dir { | Line 275 sub find_cw_dir { |
| return; | return; |
| } | } |
| sub find_permanent_dir { | |
| my ($input_node) = @_; | |
| my $online_base = '/mpiwg/online/permanent'; | |
| my $src_dir = find_online_dir($input_node, $online_base, 'pageimg'); | |
| my $dest_id = sstrip($input_node->findvalue("fm:$online_id_field")); | |
| if ((! $dest_id)||(! $src_dir)) { | |
| logger('ERROR', "no ID field for online permanent entry"); | |
| $errcnt++; | |
| return; | |
| } | |
| my $dir = "$online_base/$src_dir"; | |
| return $dir; | |
| } | |
| # | # |
| # $dir = find_online_dir($input_node, $base_dir, $page_dir) | # $dir = find_online_dir($input_node, $base_dir, $page_dir) |
| # | # |
| Line 289 sub find_online_dir { | Line 312 sub find_online_dir { |
| } | } |
| #logger('DEBUG', "online_dir1: $online_dir"); | #logger('DEBUG', "online_dir1: $online_dir"); |
| if ($online_dir) { | if ($online_dir) { |
| $online_dir =~ s/\/$//; # strip ending slashes | |
| if ($page_dir) { | if ($page_dir) { |
| $online_dir =~ s/\/${page_dir}$//; | $online_dir =~ s/\/${page_dir}$//; |
| } | } |
| Line 397 sub process_fm_entry { | Line 421 sub process_fm_entry { |
| $index_root->addChild($index_doc->createAttributeNS($namespace, 'version', '1.1')); | $index_root->addChild($index_doc->createAttributeNS($namespace, 'version', '1.1')); |
| $index_root->addChild($index_doc->createAttributeNS($namespace, 'type', 'MPIWG')); | $index_root->addChild($index_doc->createAttributeNS($namespace, 'type', 'MPIWG')); |
| $index_doc->setDocumentElement($index_root); | $index_doc->setDocumentElement($index_root); |
| my $derived_from = ""; | |
| # try to find the document directory | # try to find the document directory |
| my $doc_dir = ""; | my $doc_dir = ""; |
| if ($cw_mode) { | if ($online_mode) { |
| $doc_dir = find_permanent_dir($input_node); | |
| $derived_from = find_arch_dir($input_node); | |
| } elsif ($cw_mode) { | |
| $doc_dir = find_cw_dir($input_node); | $doc_dir = find_cw_dir($input_node); |
| } elsif ($digifiles_mode) { | } elsif ($digifiles_mode) { |
| $doc_dir = find_digifiles_dir($input_node); | $doc_dir = find_digifiles_dir($input_node); |
| Line 428 sub process_fm_entry { | Line 456 sub process_fm_entry { |
| # media | # media |
| create_text_path('media-type', 'image', $index_root, $namespace); | create_text_path('media-type', 'image', $index_root, $namespace); |
| create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); | create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
| # derived-from | |
| if ($derived_from) { | |
| create_text_path('derived-from/archive-path', $derived_from, $index_root, $namespace); | |
| } | |
| # access | |
| if ($access_type) { | |
| if ($access_type eq "free") { | |
| create_element_path('access-conditions/access@type=free', $index_root, $namespace); | |
| } else { | |
| my $acc_tag = create_element_path('access-conditions/access@type=institution', $index_root, $namespace); | |
| create_text_path('name', $access_type, $acc_tag, $namespace); | |
| } | |
| } | |
| # convert bib entries | # convert bib entries |
| my $cnt = convert_bib($input_node, $index_root, $index_doc); | my $cnt = convert_bib($input_node, $index_root, $index_doc); |