--- scaleomat/scaleomat.pl 2005/01/05 18:38:32 1.8 +++ scaleomat/scaleomat.pl 2005/01/07 16:55:40 1.9 @@ -1,6 +1,6 @@ #!/usr/bin/perl -# Copyright (C) 2003,2004 Robert Casties, IT-Group MPIWG +# Copyright (C) 2003-2005 Robert Casties, IT-Group MPIWG # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free @@ -24,7 +24,7 @@ use MPIWGlib; $| = 1; # unblock IO -my $version = "V0.9.5 (ROC 5.1.2005)"; +my $version = "V0.9.6 (ROC 6.1.2005)"; $debug = 0; @@ -38,6 +38,7 @@ my $file_perm = 0664; umask 000; # to make shure we can actually use these perms my $overwrite = 0; # overwrite already converted files +my $synchronise = 0; # delete unmatched destination files # image file extensions and formats my %img_type_ext = ("tif" => "TIFF", "tiff" => "TIFF", "gif" => "GIF", @@ -50,7 +51,7 @@ my $scale_w = 2048; # width in pixel my $scale_h = 2048; # height in pixel my $scale_relative = 0; # scale by relative factor instead of destination size my $jpeg_quality = 75; # default JPEG compression quality -my $use_encoder = 0; # autodetect encoder +my $use_encoder = 0; # false: autodetect encoder # programs to use my $identifier; @@ -206,6 +207,8 @@ sub mmkdir { # # ($type, $width, $height) = identify($filepath) # +# returns type, width and height of the image using ImageMagick's identify +# sub identify { my ($filepath) = @_; my $pictype = ""; @@ -260,6 +263,8 @@ sub identify { # # $type = quickident($filepath); # +# returns image type based on file extension only +# sub quickident { my ($filepath) = @_; logger('DEBUG', "running quickident"); @@ -275,6 +280,8 @@ sub quickident { # # $fact = scalefactor(\$args) # +# returns the necessary scaling factor +# sub scalefactor { my ($args) = @_; @@ -570,7 +577,7 @@ sub scale_tiff_png { # convert file # sub convert_file { - my($srcdir, $filename, $destdir) = @_; + my($srcdir, $filename, $destdir, $filelist) = @_; my $filebase; my $fileext; my $newfn; @@ -588,10 +595,7 @@ sub convert_file { } # get base name and extension - if ($filename =~ /^(.*)\.(\w+)$/) { - $filebase = $1; - $fileext = $2; - } + ($filebase, $fileext) = splitfn($filename); # # quick check if target image exists @@ -605,6 +609,8 @@ sub convert_file { my $newext = $target_ext_type{$pictype}; if ($newext) { $newfn = $filebase . ".$newext"; + logger('DEBUG', "adding $destdir/$newfn'"); + $$filelist{"$destdir/$newfn"} = $filename; # check if file exists if (-f "$destdir/$newfn") { logger('DEBUG', "CONV file exists: $newfn"); @@ -676,7 +682,7 @@ sub convert_file { # Descend recursively through $dirname and work on all files # sub walk_convert_dir { - my($srcdir, $workdir, $destdir) = @_; + my($srcdir, $workdir, $destdir, $filelist) = @_; my $errcnt = 0; my $newfn; @@ -689,12 +695,16 @@ sub walk_convert_dir { closedir WORKDIR; + # check all directories first if ($do_descend) { foreach (sort @dirlist) { + # skip dot-directories next if (/^[.]+$/); + # skip junk directories next if ($junk_files{$_}); + # recurse through the rest if (-d "$srcdir/$workdir/$_") { - walk_convert_dir($srcdir, "$workdir/$_", $destdir); + walk_convert_dir($srcdir, "$workdir/$_", $destdir, $filelist); } } } @@ -712,13 +722,13 @@ sub walk_convert_dir { }; } + # check all files in this directory foreach (sort @dirlist) { - # skip dot-directories - if (/^[.]+.*$/) { - next; - } + # skip dot-files + next if (/^[.]+.*$/); + # try to convert the rest if (-f "$srcdir/$workdir/$_") { - $errcnt += convert_file("$srcdir/$workdir", "$_", "$destdir/$workdir"); + $errcnt += convert_file("$srcdir/$workdir", "$_", "$destdir/$workdir", $filelist); } } @@ -739,13 +749,18 @@ sub walk_convert_dir { # sub convert_dir { my ($srcdir, $workdir, $destdir) = @_; + my %files = (); logger('INFO', "** Converting Scans **"); if (-d "$srcdir/$workdir") { # it's a dirrectory logger('INFO', "Starting in directory '$srcdir/$workdir'"); - walk_convert_dir($srcdir, $workdir, $destdir); + walk_convert_dir($srcdir, $workdir, $destdir, \%files); + if ($synchronise) { + logger('INFO', "Removing unmatched files in '$destdir/$workdir'"); + remove_unmatched_files("$destdir/$workdir", \%files); + } # touch source directory so digilib rescans the thumbnails #logger('DEBUG', "/usr/bin/touch $source_base_dirs[0]/$workdir"); system("/usr/bin/touch '$srcdir/$workdir'"); @@ -766,6 +781,46 @@ sub convert_dir { +# +# remove_unmatched_files($basedir, \%files) +# +# removes all files from $basedir and its subdirectories that are not +# in %files +# +sub remove_unmatched_files { + my ($basedir, $files) = @_; + my $cnt = 0; + + if (! opendir DIR, $basedir) { + logger('ERROR', "directory $basedir doesn't exist (in remove_unmatched_files)"); + return 0; + } + my @dirfiles = readdir DIR; + foreach my $fn (@dirfiles) { + # ignore names starting with a dot + next if ($fn =~ /^\./); + # ignore other silly files + next if ($junk_files{$fn}); + + $cnt++; + my $f = "$basedir/$fn"; + #print "fs_file: \"$f\"\n"; + if (-f $f) { + #print " is file\n"; + logger('DEBUG', "checking '$f'"); + if (not exists($$files{$f})) { + logger('DEBUG', "removing orphaned file $f"); + unlink $f; + } + } elsif (-d _) { + # recurse into directory + $cnt += remove_unmatched_files($f, $files); + # delete if its empty + rmdir $f; + } + } + return $cnt; +} @@ -779,7 +834,8 @@ if ($#ARGV < 3) { print " -scaleto=destination size (S or WxH)\n"; print " -scaleby=magnification factor.\n"; print " -jpegqual=JPEG quality (0-100)\n"; - print " -replace=yes replaces existing files (default=skip).\n"; + print " -replace=yes replaces existing files (default=update).\n"; + print " -sync=yes delete unmatched file in destination (default=ignore).\n"; print " -encoder=tiff_png|tiff_jpeg|tiff_jpeg2\n"; exit 1; } @@ -835,6 +891,12 @@ if ($$args{'replace'}) { $overwrite = 1; } +# Synchronise +if ($$args{'sync'}) { + logger('INFO', "Set to delete unmatched files!"); + $synchronise = 1; +} + # debug if ($$args{'debug'}) { logger('INFO', "Set debug level to $$args{'debug'}!");