changeset 16:8eee361fdec8

weiter arbeit am digifiles modus
author casties
date Tue, 23 Aug 2005 17:01:08 +0200
parents 8d2f0586eca6
children 6c5c7743acb1
files MPIWGStor.pm makemeta-lib.pl
diffstat 2 files changed, 154 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/MPIWGStor.pm	Mon Jun 20 17:21:30 2005 +0200
+++ b/MPIWGStor.pm	Tue Aug 23 17:01:08 2005 +0200
@@ -12,7 +12,7 @@
 	    $file_perm);
 use Exporter;
 
-$VERSION = 0.6; #  ROC 26.5.2005
+$VERSION = 0.6.1; #  ROC 8.8.2005
 
 @ISA = qw(Exporter);
 
@@ -145,7 +145,7 @@
     if ($fn =~ /^(.*)\/([^\/]+)$/) {
         return ($2, $1);
     }
-    return $fn;
+    return ($fn, '.'); # only file name
 }
 
 
--- a/makemeta-lib.pl	Mon Jun 20 17:21:30 2005 +0200
+++ b/makemeta-lib.pl	Tue Aug 23 17:01:08 2005 +0200
@@ -3,14 +3,23 @@
 use strict;
 use XML::LibXML;
 
-use lib '/usr/local/mpiwg/archive';
+use lib '/usr/local/mpiwg/archive_devel';
 use MPIWGStor;
 
 # make output unbuffered
 $|=1;
 
 # program version
-my $version = "0.1.1 (1.6.2005)";
+my $version = "0.2.1 (15.8.2005)";
+my $help = 
+"use: makemeta-lib [options] file.xml
+options:
+  -debug  show debugging info
+  -dry-run  simulate, dont'do anything
+  -cw-mode  mode for copying einstein_cq documents
+  -digifiles-mode  mode for copying files from digifiles
+  -map-file=mapfile.xml  digilib mapping file (for digifiles mode)
+";
 logger("INFO", "makemeta-lib $version");
 
 ###########################################
@@ -100,6 +109,7 @@
 # storage fields
 my $arch_id_field = 'ID_Archive';
 my $online_url_field = 'URL';
+my $online_id_field = 'ID_OnlinePermanent';
 
 #######################################################
 # internal parameters
@@ -108,15 +118,35 @@
 # storage
 my $lib_arch_dir = '/mpiwg/archive/data/library';
 my $lib_online_dir = '/mpiwg/online/permanent';
+my $lib_digilib_path = 'permanent';
+my $digifiles_dir = "/net/digifiles.mpiwg-berlin.mpg.de/Volumes/raid";
 
 # read command line parameters
 my $args = MPIWGStor::parseargs;
+if (! scalar(%$args)) {
+    print $help, "\n";
+    exit 1;
+}
 
 # debug level
 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
 
+# simulate action only
+my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
+logger('DEBUG', "dry-run: $dry_run");
+
 # use einstein-cw mode
 my $cw_mode = (exists $$args{'cw-mode'}) ? $$args{'cw-mode'} : 0;
+logger('DEBUG', "cw_mode: $cw_mode");
+
+# use digifiles mode
+my $digifiles_mode = (exists $$args{'digifiles-mode'}) ? $$args{'digifiles-mode'} : 0;
+logger('DEBUG', "digifiles_mode: $digifiles_mode");
+# digilib mapping file
+my $map_file_name = (exists $$args{'map-file'}) ? $$args{'map-file'} : "";
+logger('DEBUG', "map_file_name: $map_file_name");
+my $mapping_doc;
+my $mapping_root;
 
 # index.meta namespace (not really implemented!)
 my $namespace = "";
@@ -146,41 +176,126 @@
 # subroutines
 #
 
+
+sub add_digilib_mapping {
+    my ($src_dir, $dest_dir) = @_;
+    my $elem = $mapping_root->addNewChild($namespace, 'mapping');
+    $elem->addChild($mapping_doc->createAttributeNS($namespace, 'link', $src_dir));
+    $elem->addChild($mapping_doc->createAttributeNS($namespace, 'dir', $dest_dir));
+    if ($map_file_name) {
+	write_xml($mapping_doc, $map_file_name);
+    } else {
+	logger('ABORT', "unable to write mapping file!");
+	exit 1;
+    }
+}
+
+sub find_digifiles_dir {
+    my ($input_node) = @_;
+    my $digifiles_base = '/net/digifiles.mpiwg-berlin.mpg.de/Volumes/raid';
+    my $src_dir = find_online_dir($input_node, $digifiles_base, '');
+    if (! $src_dir) {
+	logger('ERROR', "no online directory for digifiles entry");
+	$errcnt++;
+	return;
+    }
+    my $dest_id = sstrip($input_node->findvalue("fm:$online_id_field"));
+    if (! $dest_id) {
+	logger('ERROR', "no ID field for digifiles entry");
+	$errcnt++;
+	return;
+    }
+    my $dir = "$lib_online_dir/library/$dest_id";
+    my $map_dir = "$lib_digilib_path/library/$dest_id";
+    if ($dry_run) {
+	logger('DEBUG', "would move $digifiles_base/$src_dir to $dir");
+	add_digilib_mapping($src_dir, "$map_dir/pageimg");
+	return $dir;
+    } else {
+	logger('INFO', "moving $digifiles_base/$src_dir to $dir");
+	logger('DEBUG', "mkdir $dir/pageimg"); 
+	if (system("mkdir -p $dir/pageimg && chmod -R 0775 $dir") == 0) {
+	    logger('DEBUG', "cp $digifiles_base/$src_dir $dir/pageimg"); 
+	    if (system("cp -rp $digifiles_base/$src_dir/* $dir/pageimg/") == 0) {
+		if (-d "$dir/pageimg") {
+		    logger('DEBUG', "directory $dir OK"); 
+		    add_digilib_mapping($src_dir, "$map_dir/pageimg");
+		    if (system("rm -rf $digifiles_base/$src_dir/* && rm -rf $digifiles_base/$src_dir") == 0) {
+			logger('DEBUG', "directory $digifiles_base/$src_dir removed"); 
+			return $dir;
+		    } else {
+			logger('ERROR', "unable to remove source directory $digifiles_base/$src_dir!");
+			$errcnt++;
+			return $dir;
+		    }
+		}
+	    }
+	}
+	logger('ABORT', "unable to copy directory $src_dir to $dir!");
+	exit 1;
+    }
+    return;
+}
+
 sub find_cw_dir {
     my ($input_node) = @_;
-    my $src_dir = find_online_dir($input_node, '/mpiwg/archive/data/library/inbox/zwischen_backup');
-    my $dest_id = $input_node->findvalue("fm:$arch_id_field");
+    my $cw_base = '/mpiwg/archive/data/library/inbox/zwischen_backup';
+    my $src_dir = find_online_dir($input_node, $cw_base, 'pageimg');
+    my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field"));
     if (! $dest_id) {
 	logger('ERROR', "no ID field for einstein-cw entry");
 	$errcnt++;
 	return;
     }
     my $dir = "$lib_arch_dir/$dest_id";
-    logger('DEBUG', "moving $src_dir to $dir");
-    if (rename $src_dir, $dir) {
-	if (-d $dir) {
-	    logger('DEBUG', "directory $dir OK"); 
-	    return $dir;
+    if ($dry_run) {
+	logger('DEBUG', "would move $cw_base/$src_dir to $dir");
+	return $dir;
+    } else {
+	logger('DEBUG', "moving $cw_base/$src_dir to $dir");
+	if (rename "$cw_base/$src_dir", $dir) {
+	    if (-d $dir) {
+		logger('DEBUG', "directory $dir OK"); 
+		return $dir;
+	    }
+	} else {
+	    logger('ABORT', "unable to rename directory $cw_base/$src_dir to $dir!");
+	    exit 1;
 	}
-    } else {
-	logger('ABORT', "unable to rename directory $src_dir to $dir!");
-	exit 1;
     }
     return;
 }
 
+#
+# $dir = find_online_dir($input_node, $base_dir, $page_dir)
+#
+# Takes the path from the $online_url_field of the $input_node document
+# and looks in the directory $base_dir for it. Strips $page_dir from the end.
+# Returns the directory path sans $base_dir if it exists
+#
 sub find_online_dir {
-    my ($input_node, $base_dir) = @_;
+    my ($input_node, $base_dir, $page_dir) = @_;
     $base_dir = $lib_online_dir unless ($base_dir);
 
     my $online_url = $input_node->findvalue("fm:$online_url_field");
-    if ($online_url =~ /fn=permanent\/(.+)\/pageimg/) {
-	my $online_dir = $1;
+    logger('DEBUG', "checking URL: $online_url");
+    my $online_dir;
+    if ($online_url =~ /fn=permanent\/(.+)/) {
+	# new style digilib URL
+	$online_dir = $1;
+    } elsif ($online_url =~ /\?([^\+]+)\+/) {
+	# old style digilib URL
+	$online_dir = $1;
+    }
+    #logger('DEBUG', "online_dir1: $online_dir");
+    if ($online_dir) {
+	if ($page_dir) {
+	  $online_dir =~ s/\/${page_dir}$//;
+	}
 	#logger("DEBUG", "dir: $base_dir/$online_dir");
-	my $dir = "$base_dir/$online_dir";
-	if (-d $dir) {
-	    logger('DEBUG', "directory $dir exists"); 
-	    return $dir;
+	if (-d "$base_dir/$online_dir") {
+	    logger('DEBUG', "directory $base_dir/$online_dir exists"); 
+	    return $online_dir;
 	}
     }
     return;
@@ -270,6 +385,7 @@
     foreach my $n ($input_root->findnodes('fm:ROW')) {
 	logger('INFO', "processing entry $cnt ...");
 	process_fm_entry($n);
+	$cnt++;
     }
 }    
 
@@ -286,6 +402,8 @@
     my $doc_dir = "";
     if ($cw_mode) {
 	$doc_dir = find_cw_dir($input_node);
+    } elsif ($digifiles_mode) {
+	$doc_dir = find_digifiles_dir($input_node);
     } else {
 	$doc_dir = find_arch_dir($input_node);
     }
@@ -321,7 +439,12 @@
     }
 
     # write new index.meta file
-    write_xml($index_doc, "$doc_dir/index.meta");
+    if ($dry_run) {
+	logger('DEBUG', "would write $doc_dir/index.meta");
+	logger('DEBUG', $index_doc->toString(1));
+    } else {
+	write_xml($index_doc, "$doc_dir/index.meta");
+    }
 
 }
 
@@ -339,6 +462,15 @@
 my $fm_namespace = $input_root->namespaceURI();
 $input_root->setNamespace($fm_namespace, 'fm', 1);
 
+# create digilib mapping file for digifiles mode
+if ($digifiles_mode) {
+    $mapping_doc = XML::LibXML::Document->createDocument('1.0', 'UTF-8');
+    $mapping_root = $mapping_doc->createElementNS($namespace, 'digilib-aliases');
+    $mapping_doc->setDocumentElement($mapping_root);
+#<mapping link="exp1/archimedes_image_repository/archimedes_large/achil_propo_087_la_1545" dir="permanent/archimedes_repository/large/achil_propo_087_la_1545"/>
+
+}
+
 process_all_fm_entries($input_root);