#!/usr/bin/perl # Copyright (C) 2003,2004 Robert Casties, IT-Group MPIWG # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Please read license.txt for the full details. A copy of the GPL may be found # at http://www.gnu.org/copyleft/lgpl.html # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., 59 Temple # Place, Suite 330, Boston, MA 02111-1307 USA use strict; use sigtrap qw(die normal-signals); # MPIWG libraries use lib '/usr/local/mpiwg/scripts'; use MPIWGlib; $| = 1; # unblock IO my $version = "V0.9.5 (ROC 5.1.2005)"; $debug = 0; my $simulate = 0; my $do_descend = 1; my $dir_perm = 0775; my $file_perm = 0664; umask 000; # to make shure we can actually use these perms my $overwrite = 0; # overwrite already converted files # image file extensions and formats my %img_type_ext = ("tif" => "TIFF", "tiff" => "TIFF", "gif" => "GIF", "jpg" => "JPEG", "png" => "PNG", "dcr" => "RAW"); # destination image file extensions my %target_ext_type = ("TIFF" => "jpg", "JPEG" => "jpg"); # default scale settings my $scale_w = 2048; # width in pixel my $scale_h = 2048; # height in pixel my $scale_relative = 0; # scale by relative factor instead of destination size my $jpeg_quality = 75; # default JPEG compression quality my $use_encoder = 0; # autodetect encoder # programs to use my $identifier; my $tiffinfo; my $jpegloader; my $tiffloader; my $quantizer; my $scaler; my $jpegwriter; my $pngwriter; my $converter; ########################################################################## # subroutines # # # checksoft() # # returns if all necessary external programs are installed # sub checksoft { my @softdirs = ("/usr/X11R6/bin", "/usr/bin", "/usr/local/bin"); $identifier = findfile(\@softdirs, "identify") or die("ABORT: neccessary external program not found: identify"); $tiffinfo = findfile(\@softdirs, "tiffinfo") or die("ABORT: neccessary external program not found: tiffinfo"); $jpegloader = findfile(\@softdirs, "jpegtopnm") or die("ABORT: neccessary external program not found: jpegtopnm"); $tiffloader = findfile(\@softdirs, "tifftopnm") or die("ABORT: neccessary external program not found: tifftopnm"); $quantizer = findfile(\@softdirs, "pbmtopgm") or die("ABORT: neccessary external program not found: pbmtopgm"); $scaler = findfile(\@softdirs, "pnmscale") or die("ABORT: neccessary external program not found: pnmscale"); $jpegwriter = findfile(\@softdirs, "ppmtojpeg") or die("ABORT: neccessary external program not found: ppmtojpeg"); $pngwriter = findfile(\@softdirs, "pnmtopng") or die("ABORT: neccessary external program not found: pnmtopng"); $converter = findfile(\@softdirs, "convert") or die("ABORT: neccessary external program not found: convert"); } # # findir(\@basedirs, $subdir) # # check directories in @basedirs+$subdir and return the first existing basedir # sub findir { my($dirlist, $subdir) = @_; foreach my $dir (@$dirlist) { if (-d "$dir/$subdir") { return "$dir"; } } return; } # # findfile(\@basedirs, $filename) # # check @basedirs+$filename and return the first existing file # sub findfile { my($dirlist, $fn) = @_; foreach my $dir (@$dirlist) { if (-f "$dir/$fn") { return "$dir/$fn"; } } return; } # # $p = cleanpath($path, $startslash). # # returns a pathname with trailing and starting slash removed (if # $startslash is true the starting slash is not removed) # sub cleanpath { my ($path, $startslash) = @_; if ($path =~ /^(\/)*(.*?)\/*$/) { if ($startslash) { return $1 . $2; } else { return $2; } } return $path; } # # ($basename, $extension) = splitfn($filename) # # split filename into base and (lowercase) extension # sub splitfn { my ($filepath) = @_; if ($filepath =~ /^(.*)\.(\w+)$/) { return ($1, lc($2)); } return; } # # mmkdir($dirname) # # create directory recursively and check permissions # sub mmkdir { my($dirname) = @_; my $newdir; # does the directory already exist? if (-d $dirname) { chmod $dir_perm, $dirname or do { logger('WARNING', "unable to change permission on $dirname!"); }; return 1; } # split directory name by levels my @dirlist = split /\//, $dirname; my @newlist = (); my $dir = join("/", @dirlist); # test backwards which directories exist while (not -d $dir) { last unless ($dir); # move missing elements from the end of @dirlist to @newlist unshift @newlist, pop @dirlist; $dir = join("/", @dirlist); } # create missing directories level by level foreach $newdir (@newlist) { push @dirlist, $newdir; $dir = join("/", @dirlist); mkdir "$dir", $dir_perm or do { logger('ERROR', "unable to create $dir!"); return 0; } } return 1; } # # ($type, $width, $height) = identify($filepath) # sub identify { my ($filepath) = @_; my $pictype = ""; my $picwidth = 0; my $picheight = 0; my $bitdepth = 0; # use quickident first $pictype = quickident($filepath); # optimized tiff identification if (($pictype)&&($pictype eq 'TIFF')) { logger('DEBUG', "running tiffinfo $tiffinfo"); if (open(IDENT, "nice -10 $tiffinfo '$filepath' 2>/dev/null |")) { while () { chomp; # we take the biggest values, because embedded thumbnails # may also show up if (/Image Width:\s*(\d+)\s*Image Length:\s*(\d+)/) { $picwidth = $1 if ($1 > $picwidth); $picheight = $2 if ($2 > $picheight); next; } if (/Bits\/Sample:\s*(\d+)/) { $bitdepth = $1 if ($1 > $bitdepth); next; } } if ($picwidth) { logger('DEBUG', "TIFF $picwidth x $picheight"); return ($pictype, $picwidth, $picheight, $bitdepth); } } } # run ident to get image type and dimensions logger('DEBUG', "running identifier $identifier"); if (open(IDENT, "nice -10 $identifier -ping -format '%w %h %m\n' '$filepath' 2>/dev/null |")) { my @id = ; my $picinfo = $id[0]; close IDENT; chomp $picinfo; logger('DEBUG', "PIC is '$picinfo'"); if ($picinfo =~ /^(\d+) (\d+) (\w+)$/) { $picwidth = $1; $picheight = $2; $pictype = $3; } } else { logger('DEBUG', "unable to identify $filepath!"); } return ($pictype, $picwidth, $picheight, $bitdepth); } # # $type = quickident($filepath); # sub quickident { my ($filepath) = @_; logger('DEBUG', "running quickident"); # look at file extension my ($filebase, $fileext) = splitfn($filepath); if ($fileext) { return $img_type_ext{$fileext}; } return; } # # $fact = scalefactor(\$args) # sub scalefactor { my ($args) = @_; my $srcdir = $$args{'srcdir'}; my $filename = $$args{'filename'}; my $scale_w = $$args{'scale_w'}; my $scale_h = $$args{'scale_h'}; my $scale_rel = $$args{'scale_rel'}; my $scale = 0; if ($scale_rel) { # scale relative -- no size needed, only scaling factor $scale = $scale_w; } else { # scale to size -- size needed my $pictype = $$args{'pictype'}; my $picwidth = $$args{'picwidth'}; my $picheight = $$args{'picheight'}; if (! $picwidth) { # no size yet - identify ($pictype, $picwidth, $picheight) = identify("$srcdir/$filename"); if ((! $pictype)||($picwidth == 0)||($picheight == 0)) { logger('ERROR', "unable to identify $srcdir/$filename!"); return 0; } # save values $$args{'pictype'} = $pictype; $$args{'picwidth'} = $picwidth; $$args{'picheight'} = $picheight; } # calculate x and y scaling factors my $scale_x = $scale_w / $picwidth; my $scale_y = $scale_h / $picheight; # take the smallest if ($scale_x < $scale_y) { $scale = $scale_x; logger('DEBUG', "PIC scale to width"); } else { $scale = $scale_y; logger('DEBUG', "PIC scale to height"); } if ($scale >= 1) { $scale = 1; logger('DEBUG', "PIC is already smaller"); # continue since we may have to convert } } return $scale; } # # $error = scale_jpeg(\$args); # # scale JPEG images to JPEG using netpbm tools # # args needed: $srcdir, $filename, $destdir, # $scale_w, $scale_h, $scale_rel, $picwidth, $picheight # sub scale_jpeg { my ($args) = @_; my $srcdir = $$args{'srcdir'}; my $filename = $$args{'filename'}; my $destdir = $$args{'destdir'}; my $scale_w = $$args{'scale_w'}; my $scale_h = $$args{'scale_h'}; my $scale_rel = $$args{'scale_rel'}; my $scaleopt; # convert jpg -> jpg my ($basename, $fileext) = splitfn($filename); my $newfn = $basename . ".jpg"; logger('INFO', "Convert(jpg): $filename -> $newfn"); return 1 if ($simulate); $scaleopt = scalefactor($args); if (!$scaleopt) { logger('ERROR', "unable to calculate scaling options!"); return 1; } if ($scaleopt == 1) { # is already smaller logger('DEBUG', "PIC is smaller and JPEG - ignoring"); return 0; } # convert logger('DEBUG', "nice -10 $jpegloader '$srcdir/$filename' 2>/dev/null | nice -10 $scaler $scaleopt 2>/dev/null | nice -10 $jpegwriter > '$destdir/$newfn' 2>/dev/null"); return 0 if ($simulate); if (system("nice -10 $jpegloader '$srcdir/$filename' 2>/dev/null | nice -10 $scaler $scaleopt 2>/dev/null | nice -10 $jpegwriter > '$destdir/$newfn' 2>/dev/null") != 0) { logger('ERROR', "error converting '$srcdir/$filename'!"); if (! -s "$destdir/$newfn") { # file broken (size 0) logger('ERROR', "created file '$destdir/$newfn' broken -- will be removed!"); unlink "$destdir/$newfn"; } return 1; } # change permissions chmod $file_perm, "$destdir/$newfn" or logger('WARNING', "unable to set permission on '$destdir/$newfn'"); if (! -s "$destdir/$newfn") { # file broken (size 0) logger('ERROR', "created file '$destdir/$newfn' broken -- will be removed!"); unlink "$destdir/$newfn"; return 1; } return 0; } # # $error = scale_tiff_jpeg2(\$args); # # scale TIFF images to JPEG using ImageMagick convert # # args needed: $srcdir, $filename, $destdir, # $scale_w, $scale_h, $scale_rel, $picwidth, $picheight # sub scale_tiff_jpeg2 { my ($args) = @_; my $srcdir = $$args{'srcdir'}; my $filename = $$args{'filename'}; my $destdir = $$args{'destdir'}; my $scale_w = $$args{'scale_w'}; my $scale_h = $$args{'scale_h'}; my $scale_rel = $$args{'scale_rel'}; my $scaleopt; my ($basename, $fileext) = splitfn($filename); my $newfn = $basename . ".jpg"; logger('INFO', "Convert(tiff2): $filename -> $newfn"); if ($scale_rel) { my $per_scale = 100 * $scale_w; logger('DEBUG', "nice -10 $converter -quality $jpeg_quality -scale $per_scale\% '$srcdir/$filename' '$destdir/$newfn' 2>/dev/null"); return 0 if ($simulate); if (system("nice -10 $converter -quality $jpeg_quality -scale $per_scale\% '$srcdir/$filename' '$destdir/$newfn' 2>/dev/null\n") != 0) { logger('ERROR', "error converting '$srcdir/$filename'!"); return 1; } } else { logger('DEBUG', "nice -10 $converter -quality $jpeg_quality -scale ${scale_w}x${scale_h} '$srcdir/$filename' '$destdir/$newfn' 2>/dev/null"); return 0 if ($simulate); if (system("nice -10 $converter -quality $jpeg_quality -scale ${scale_w}x${scale_h} '$srcdir/$filename' '$destdir/$newfn' 2>/dev/null\n") != 0) { logger('ERROR', "error converting '$srcdir/$filename'!"); if (! -s "$destdir/$newfn") { # file broken (size 0) logger('ERROR', "created file '$destdir/$newfn' broken -- will be removed!"); unlink "$destdir/$newfn"; } return 1; } } # change permissions chmod $file_perm, "$destdir/$newfn" or logger('WARNING', "unable to set permission on '$destdir/$newfn'"); return 0; } # # $error = scale_tiff_jpeg(\$args); # # scale TIFF images to JPEG using netpbm tools # # args needed: $srcdir, $filename, $destdir, # $scale_w, $scale_h, $scale_rel, $picwidth, $picheight # sub scale_tiff_jpeg { my ($args) = @_; my $srcdir = $$args{'srcdir'}; my $filename = $$args{'filename'}; my $destdir = $$args{'destdir'}; my $bitdepth = $$args{'bitdepth'}; my $scale_w = $$args{'scale_w'}; my $scale_h = $$args{'scale_h'}; my $scale_rel = $$args{'scale_rel'}; my $scaleopt; # convert jpg -> jpg my ($basename, $fileext) = splitfn($filename); my $newfn = $basename . ".jpg"; logger('INFO', "Convert(tiff1): $filename -> $newfn"); return 1 if ($simulate); $scaleopt = scalefactor($args); if (!$scaleopt) { logger('ERROR', "unable to calculate scaling options!"); return 1; } # convert my $cmd = "nice -10 $tiffloader \'$srcdir/$filename\' 2>/dev/null "; if ($bitdepth == 1) { # antialiasing bilevel images $cmd .= "| nice -10 $quantizer 2 2 2>/dev/null "; } $cmd .= "| nice -10 $scaler $scaleopt 2>/dev/null | nice -10 $jpegwriter --quality $jpeg_quality > '$destdir/$newfn' 2>/dev/null"; logger('DEBUG', "$cmd"); return 0 if ($simulate); if (system($cmd) != 0) { logger('ERROR', "error converting '$srcdir/$filename'!"); if (! -s "$destdir/$newfn") { # file broken (size 0) logger('ERROR', "created file '$destdir/$newfn' broken -- will be removed!"); unlink "$destdir/$newfn"; } return 1; } # change permissions chmod $file_perm, "$destdir/$newfn" or logger('WARNING', "unable to set permission on '$destdir/$newfn'"); return 0; } # # $error = scale_tiff_png(\$args); # # scale TIFF images to PNG using netpbm tools # # args needed: $srcdir, $filename, $destdir, # $scale_w, $scale_h, $scale_rel, $picwidth, $picheight # sub scale_tiff_png { my ($args) = @_; my $srcdir = $$args{'srcdir'}; my $filename = $$args{'filename'}; my $destdir = $$args{'destdir'}; my $bitdepth = $$args{'bitdepth'}; my $scale_w = $$args{'scale_w'}; my $scale_h = $$args{'scale_h'}; my $scale_rel = $$args{'scale_rel'}; my $scaleopt; # convert tif -> png my ($basename, $fileext) = splitfn($filename); my $newfn = $basename . ".png"; logger('INFO', "Convert(tiff3): $filename -> $newfn"); $scaleopt = scalefactor($args); if (!$scaleopt) { logger('ERROR', "unable to calculate scaling options!"); return 1; } # convert my $cmd = "nice -10 $tiffloader \'$srcdir/$filename\' 2>/dev/null "; if ($bitdepth == 1) { # antialiasing bilevel images $cmd .= "| nice -10 $quantizer 2 2 2>/dev/null "; } $cmd .= "| nice -10 $scaler $scaleopt 2>/dev/null | nice -10 $pngwriter > '$destdir/$newfn' 2>/dev/null"; logger('DEBUG', "$cmd"); return 0 if ($simulate); if (system($cmd) != 0) { logger('ERROR', "error converting '$srcdir/$filename'!"); if (! -s "$destdir/$newfn") { # file broken (size 0) logger('ERROR', "created file '$destdir/$newfn' broken -- will be removed!"); unlink "$destdir/$newfn"; } return 1; } # change permissions chmod $file_perm, "$destdir/$newfn" or logger('WARNING', "unable to set permission on '$destdir/$newfn'"); return 0; } # # $error = convert_file($srcdir, $filename, $destdir); # # convert file # sub convert_file { my($srcdir, $filename, $destdir) = @_; my $filebase; my $fileext; my $newfn; my $pictype; my $picwidth; my $picheight; my $bitdepth; my $error = 0; logger('DEBUG', "convert_file ($srcdir, $filename, $destdir)"); if (not (( -f "$srcdir/$filename") && (-r _))) { logger('ERROR', "unable to read file '$srcdir/$filename'"); return 1; } # get base name and extension if ($filename =~ /^(.*)\.(\w+)$/) { $filebase = $1; $fileext = $2; } # # quick check if target image exists # $pictype = quickident("$srcdir/$filename"); if ($pictype) { if ($pictype eq "RAW") { logger('DEBUG', "skipping raw file '$srcdir/$filename'"); return 0; } my $newext = $target_ext_type{$pictype}; if ($newext) { $newfn = $filebase . ".$newext"; # check if file exists if (-f "$destdir/$newfn") { logger('DEBUG', "CONV file exists: $newfn"); if (! $overwrite) { # compare age with source file if (-M "$destdir/$newfn" > -M "$srcdir/$filename") { logger('DEBUG', "CONV file is older: $newfn"); } else { logger('INFO', "File already converted: $newfn"); return 0; } } } } else { logger('DEBUG', "target extension for $pictype unknown!"); } } else { # quick ident failed -- do it slowly ($pictype, $picwidth, $picheight, $bitdepth) = identify("$srcdir/$filename"); if ((! $pictype)||($picwidth == 0)||($picheight == 0)) { logger('WARNING', "unknown file type '$srcdir/$filename'"); return 0; } } # collect arguments for the conversion my %args; $args{'srcdir'} = $srcdir; $args{'destdir'} = $destdir; $args{'filename'} = $filename; $args{'pictype'} = $pictype; $args{'picwidth'} = $picwidth; $args{'picheight'} = $picheight; $args{'bitdepth'} = $bitdepth; $args{'srcdir'} = $srcdir; $args{'scale_w'} = $scale_w; $args{'scale_h'} = $scale_h; $args{'scale_rel'} = $scale_relative; # decide conversion based on image type and encoding preferences if ($pictype eq 'JPEG') { $args{'jpeg_qual'} = $jpeg_quality; #default encoder $error = scale_jpeg(\%args); } elsif ($pictype eq 'TIFF') { if ($use_encoder) { # use specific encoder if ($use_encoder eq 'tiff_jpeg') { $error = scale_tiff_jpeg(\%args); } elsif ($use_encoder eq 'tiff_jpeg2') { $error = scale_tiff_jpeg2(\%args); } elsif ($use_encoder eq 'tiff_png') { $error = scale_tiff_png(\%args); } } else { # default $error = scale_tiff_jpeg(\%args); } } else { logger('WARNING', "unknown file type: $pictype ($srcdir/$filename)"); } return $error; } # # walk_convert_dir($dirname) # # Descend recursively through $dirname and work on all files # sub walk_convert_dir { my($srcdir, $workdir, $destdir) = @_; my $errcnt = 0; my $newfn; opendir WORKDIR, "$srcdir/$workdir" or do { logger('ERROR', "Unable to open directory $srcdir/$workdir!"); return 0; }; my @dirlist = readdir(WORKDIR); closedir WORKDIR; if ($do_descend) { foreach (sort @dirlist) { next if (/^[.]+$/); next if ($junk_files{$_}); if (-d "$srcdir/$workdir/$_") { walk_convert_dir($srcdir, "$workdir/$_", $destdir); } } } logger('INFO', "Working on $workdir"); logger('INFO', "Reading from $srcdir/$workdir."); logger('INFO', "Writing to $destdir/$workdir"); # create destination directory if (not ($simulate)) { mmkdir("$destdir/$workdir") or do { logger("ERROR", "unable to create directory '$destdir/$workdir'"); $errcnt++; return 0; }; } foreach (sort @dirlist) { # skip dot-directories if (/^[.]+.*$/) { next; } if (-f "$srcdir/$workdir/$_") { $errcnt += convert_file("$srcdir/$workdir", "$_", "$destdir/$workdir"); } } if ($errcnt) { logger('INFO', "There were $errcnt errors converting '$workdir'!"); } else { logger('INFO', "Finished converting $workdir!"); } return 1; } # # convert_dir($from_dir) # # Convert directory "from_dir" and its subdirectories # sub convert_dir { my ($srcdir, $workdir, $destdir) = @_; logger('INFO', "** Converting Scans **"); if (-d "$srcdir/$workdir") { # it's a dirrectory logger('INFO', "Starting in directory '$srcdir/$workdir'"); walk_convert_dir($srcdir, $workdir, $destdir); # touch source directory so digilib rescans the thumbnails #logger('DEBUG', "/usr/bin/touch $source_base_dirs[0]/$workdir"); system("/usr/bin/touch '$srcdir/$workdir'"); } elsif (-f _) { # it's a file logger('INFO', "Converting file '$srcdir/$workdir'"); convert_file($srcdir, $workdir, $destdir); # touch source parent directory so digilib rescans the thumbnails my $pdir = "$srcdir/$workdir"; # chop off after the last slash $pdir =~ s/\/[^\/]+$/\//; system("/usr/bin/touch '$pdir'"); } logger('DONE', "** Finished converting scans **"); return 1; } ############################################################### ## Main if ($#ARGV < 3) { print "Scale-O-Mat $version\n"; print " use: scaleomat.pl -src=src-base -dest=dest-base -dir=workdir [...]\n"; print " reads from scr-base/workdir and writes to dest-base/workdir\n"; print " -scaleto=destination size (S or WxH)\n"; print " -scaleby=magnification factor.\n"; print " -jpegqual=JPEG quality (0-100)\n"; print " -replace=yes replaces existing files (default=skip).\n"; print " -encoder=tiff_png|tiff_jpeg|tiff_jpeg2\n"; exit 1; } # test software installation checksoft(); # read command line parameters my $args = parseargs(); # source dir my $srcdir = cleanpath($$args{'src'}, 1); # destination dir my $destdir = cleanpath($$args{'dest'}, 1); # working dir my $workdir = cleanpath($$args{'dir'}); # destination size if ($$args{'scaleby'}) { $scale_relative = 1; $scale_w = $$args{'scaleby'}; logger('INFO', "Scaling relative by factor $scale_w"); } if ($$args{'scaleto'}) { $scale_relative = 0; if ($$args{'scaleto'} =~ /(\d+)x(\d+)/) { $scale_w = $1; $scale_h = $2; } else { $scale_w = $$args{'scaleto'}; $scale_h = $$args{'scaleto'}; } logger('INFO', "Scaling absolute to size $scale_w x $scale_h"); } # JPEG quality if ($$args{'jpegqual'}) { logger('INFO', "JPEG quality set to '$$args{'jpegqual'}'!"); $jpeg_quality = $$args{'jpegqual'}; } # force encoder if ($$args{'encoder'}) { logger('INFO', "Using encoder '$$args{'encoder'}'!"); $use_encoder = $$args{'encoder'}; } # Overwrite if ($$args{'replace'}) { logger('INFO', "Set to overwrite existing files!"); $overwrite = 1; } # debug if ($$args{'debug'}) { logger('INFO', "Set debug level to $$args{'debug'}!"); $debug = $$args{'debug'}; } # simulate if ($$args{'simulate'}) { logger('INFO', "Set to simulate operation only ($$args{'simulate'})!"); $simulate = $$args{'simulate'}; } convert_dir($srcdir, $workdir, $destdir);