# HG changeset patch # User casties # Date 1124809268 -7200 # Node ID 8eee361fdec8bf470c9a9657ad2a19613a6b3ea5 # Parent 8d2f0586eca69f2f2d1946b44d1c9263adfb77a5 weiter arbeit am digifiles modus diff -r 8d2f0586eca6 -r 8eee361fdec8 MPIWGStor.pm --- a/MPIWGStor.pm Mon Jun 20 17:21:30 2005 +0200 +++ b/MPIWGStor.pm Tue Aug 23 17:01:08 2005 +0200 @@ -12,7 +12,7 @@ $file_perm); use Exporter; -$VERSION = 0.6; # ROC 26.5.2005 +$VERSION = 0.6.1; # ROC 8.8.2005 @ISA = qw(Exporter); @@ -145,7 +145,7 @@ if ($fn =~ /^(.*)\/([^\/]+)$/) { return ($2, $1); } - return $fn; + return ($fn, '.'); # only file name } diff -r 8d2f0586eca6 -r 8eee361fdec8 makemeta-lib.pl --- a/makemeta-lib.pl Mon Jun 20 17:21:30 2005 +0200 +++ b/makemeta-lib.pl Tue Aug 23 17:01:08 2005 +0200 @@ -3,14 +3,23 @@ use strict; use XML::LibXML; -use lib '/usr/local/mpiwg/archive'; +use lib '/usr/local/mpiwg/archive_devel'; use MPIWGStor; # make output unbuffered $|=1; # program version -my $version = "0.1.1 (1.6.2005)"; +my $version = "0.2.1 (15.8.2005)"; +my $help = +"use: makemeta-lib [options] file.xml +options: + -debug show debugging info + -dry-run simulate, dont'do anything + -cw-mode mode for copying einstein_cq documents + -digifiles-mode mode for copying files from digifiles + -map-file=mapfile.xml digilib mapping file (for digifiles mode) +"; logger("INFO", "makemeta-lib $version"); ########################################### @@ -100,6 +109,7 @@ # storage fields my $arch_id_field = 'ID_Archive'; my $online_url_field = 'URL'; +my $online_id_field = 'ID_OnlinePermanent'; ####################################################### # internal parameters @@ -108,15 +118,35 @@ # storage my $lib_arch_dir = '/mpiwg/archive/data/library'; my $lib_online_dir = '/mpiwg/online/permanent'; +my $lib_digilib_path = 'permanent'; +my $digifiles_dir = "/net/digifiles.mpiwg-berlin.mpg.de/Volumes/raid"; # read command line parameters my $args = MPIWGStor::parseargs; +if (! scalar(%$args)) { + print $help, "\n"; + exit 1; +} # debug level $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; +# simulate action only +my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0; +logger('DEBUG', "dry-run: $dry_run"); + # use einstein-cw mode my $cw_mode = (exists $$args{'cw-mode'}) ? $$args{'cw-mode'} : 0; +logger('DEBUG', "cw_mode: $cw_mode"); + +# use digifiles mode +my $digifiles_mode = (exists $$args{'digifiles-mode'}) ? $$args{'digifiles-mode'} : 0; +logger('DEBUG', "digifiles_mode: $digifiles_mode"); +# digilib mapping file +my $map_file_name = (exists $$args{'map-file'}) ? $$args{'map-file'} : ""; +logger('DEBUG', "map_file_name: $map_file_name"); +my $mapping_doc; +my $mapping_root; # index.meta namespace (not really implemented!) my $namespace = ""; @@ -146,41 +176,126 @@ # subroutines # + +sub add_digilib_mapping { + my ($src_dir, $dest_dir) = @_; + my $elem = $mapping_root->addNewChild($namespace, 'mapping'); + $elem->addChild($mapping_doc->createAttributeNS($namespace, 'link', $src_dir)); + $elem->addChild($mapping_doc->createAttributeNS($namespace, 'dir', $dest_dir)); + if ($map_file_name) { + write_xml($mapping_doc, $map_file_name); + } else { + logger('ABORT', "unable to write mapping file!"); + exit 1; + } +} + +sub find_digifiles_dir { + my ($input_node) = @_; + my $digifiles_base = '/net/digifiles.mpiwg-berlin.mpg.de/Volumes/raid'; + my $src_dir = find_online_dir($input_node, $digifiles_base, ''); + if (! $src_dir) { + logger('ERROR', "no online directory for digifiles entry"); + $errcnt++; + return; + } + my $dest_id = sstrip($input_node->findvalue("fm:$online_id_field")); + if (! $dest_id) { + logger('ERROR', "no ID field for digifiles entry"); + $errcnt++; + return; + } + my $dir = "$lib_online_dir/library/$dest_id"; + my $map_dir = "$lib_digilib_path/library/$dest_id"; + if ($dry_run) { + logger('DEBUG', "would move $digifiles_base/$src_dir to $dir"); + add_digilib_mapping($src_dir, "$map_dir/pageimg"); + return $dir; + } else { + logger('INFO', "moving $digifiles_base/$src_dir to $dir"); + logger('DEBUG', "mkdir $dir/pageimg"); + if (system("mkdir -p $dir/pageimg && chmod -R 0775 $dir") == 0) { + logger('DEBUG', "cp $digifiles_base/$src_dir $dir/pageimg"); + if (system("cp -rp $digifiles_base/$src_dir/* $dir/pageimg/") == 0) { + if (-d "$dir/pageimg") { + logger('DEBUG', "directory $dir OK"); + add_digilib_mapping($src_dir, "$map_dir/pageimg"); + if (system("rm -rf $digifiles_base/$src_dir/* && rm -rf $digifiles_base/$src_dir") == 0) { + logger('DEBUG', "directory $digifiles_base/$src_dir removed"); + return $dir; + } else { + logger('ERROR', "unable to remove source directory $digifiles_base/$src_dir!"); + $errcnt++; + return $dir; + } + } + } + } + logger('ABORT', "unable to copy directory $src_dir to $dir!"); + exit 1; + } + return; +} + sub find_cw_dir { my ($input_node) = @_; - my $src_dir = find_online_dir($input_node, '/mpiwg/archive/data/library/inbox/zwischen_backup'); - my $dest_id = $input_node->findvalue("fm:$arch_id_field"); + my $cw_base = '/mpiwg/archive/data/library/inbox/zwischen_backup'; + my $src_dir = find_online_dir($input_node, $cw_base, 'pageimg'); + my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); if (! $dest_id) { logger('ERROR', "no ID field for einstein-cw entry"); $errcnt++; return; } my $dir = "$lib_arch_dir/$dest_id"; - logger('DEBUG', "moving $src_dir to $dir"); - if (rename $src_dir, $dir) { - if (-d $dir) { - logger('DEBUG', "directory $dir OK"); - return $dir; + if ($dry_run) { + logger('DEBUG', "would move $cw_base/$src_dir to $dir"); + return $dir; + } else { + logger('DEBUG', "moving $cw_base/$src_dir to $dir"); + if (rename "$cw_base/$src_dir", $dir) { + if (-d $dir) { + logger('DEBUG', "directory $dir OK"); + return $dir; + } + } else { + logger('ABORT', "unable to rename directory $cw_base/$src_dir to $dir!"); + exit 1; } - } else { - logger('ABORT', "unable to rename directory $src_dir to $dir!"); - exit 1; } return; } +# +# $dir = find_online_dir($input_node, $base_dir, $page_dir) +# +# Takes the path from the $online_url_field of the $input_node document +# and looks in the directory $base_dir for it. Strips $page_dir from the end. +# Returns the directory path sans $base_dir if it exists +# sub find_online_dir { - my ($input_node, $base_dir) = @_; + my ($input_node, $base_dir, $page_dir) = @_; $base_dir = $lib_online_dir unless ($base_dir); my $online_url = $input_node->findvalue("fm:$online_url_field"); - if ($online_url =~ /fn=permanent\/(.+)\/pageimg/) { - my $online_dir = $1; + logger('DEBUG', "checking URL: $online_url"); + my $online_dir; + if ($online_url =~ /fn=permanent\/(.+)/) { + # new style digilib URL + $online_dir = $1; + } elsif ($online_url =~ /\?([^\+]+)\+/) { + # old style digilib URL + $online_dir = $1; + } + #logger('DEBUG', "online_dir1: $online_dir"); + if ($online_dir) { + if ($page_dir) { + $online_dir =~ s/\/${page_dir}$//; + } #logger("DEBUG", "dir: $base_dir/$online_dir"); - my $dir = "$base_dir/$online_dir"; - if (-d $dir) { - logger('DEBUG', "directory $dir exists"); - return $dir; + if (-d "$base_dir/$online_dir") { + logger('DEBUG', "directory $base_dir/$online_dir exists"); + return $online_dir; } } return; @@ -270,6 +385,7 @@ foreach my $n ($input_root->findnodes('fm:ROW')) { logger('INFO', "processing entry $cnt ..."); process_fm_entry($n); + $cnt++; } } @@ -286,6 +402,8 @@ my $doc_dir = ""; if ($cw_mode) { $doc_dir = find_cw_dir($input_node); + } elsif ($digifiles_mode) { + $doc_dir = find_digifiles_dir($input_node); } else { $doc_dir = find_arch_dir($input_node); } @@ -321,7 +439,12 @@ } # write new index.meta file - write_xml($index_doc, "$doc_dir/index.meta"); + if ($dry_run) { + logger('DEBUG', "would write $doc_dir/index.meta"); + logger('DEBUG', $index_doc->toString(1)); + } else { + write_xml($index_doc, "$doc_dir/index.meta"); + } } @@ -339,6 +462,15 @@ my $fm_namespace = $input_root->namespaceURI(); $input_root->setNamespace($fm_namespace, 'fm', 1); +# create digilib mapping file for digifiles mode +if ($digifiles_mode) { + $mapping_doc = XML::LibXML::Document->createDocument('1.0', 'UTF-8'); + $mapping_root = $mapping_doc->createElementNS($namespace, 'digilib-aliases'); + $mapping_doc->setDocumentElement($mapping_root); +# + +} + process_all_fm_entries($input_root);