#!/usr/bin/perl -w use strict; use warnings; use utf8; use open qw(:std :utf8); # Filter_2_04_check_unknown_characters.pl # 2010-04-14 # Wolfgang Schmidle # Mit Hilfe dieses Skriptes wird der unknown-Block angelegt. # input # output # text input my @text; while(<>) { push @text, $_; } # read default parameters my $filter = "mpdl-workflow-settings.pl"; my $result = `perl $filter`; my $aux = ($result =~ m! aux=([^ ]+) ! ? $1 : "" ); while ($result =~ m! unknown=([^ ]+)!g) { push @ARGV, "$aux/$1"; } # read the "parameters" block my %unknown = (); my $inText = 0; my $inParameters = 0; foreach (@text) { last if m!) { chomp; s!#.*$!!; # remove comments s! +$!!; next if m!^$!; if (m!<(\d\d\d)> +see +<(\d\d\d)>(.*)!) { unless (exists $unknown{$2}) { die "invalid reference:\n$_\n"; } $unknown{$1} = "$unknown{$2} $3"; next; } if (m!<(\d\d\d)> *= *(.+)!) { $unknown{$1} = $2; } } # go through the text my %number; my %line; my $pb = 0; my $i = 0; foreach (@text) { $i++; # if (m!]*n="(\d+)"!) { $pb = $1; } if (m!]!) { $pb++; } while (s!<(\d\d\d)>!<¥$1>!) { unless ($number{$1}++) { $line{$1} = "<$1>: first occurrence in line $i (page $pb):\n$text[$i-2]$_$text[$i]"; if ($unknown{$1}) { $line{$1} .= "suggested replacement: $unknown{$1}\n"; } $line{$1} =~ s!¥!!g; } } } my $username = "kthoden"; my $output = ""; # print first occurrences foreach my $code (sort keys %line) { $output .= "\n$line{$code}\n"; } # print a suggestion for the "unknown" block $output .= "unknown:\n"; foreach my $code (sort keys %line) { my $times = ( $number{$code} == 1 ? "time" : "times" ); $output .= "<$code> = $unknown{$code} ($number{$code} $times)\n"; } $output .= "\n"; open (OUTPUT, ">/Users/$username/Desktop/script-output.txt") or die "Can't write the raw text!\n"; print OUTPUT $output; close(OUTPUT); die "That's all.\n";