X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=misc%2Ftranslator%2Ftmpl_process3.pl;h=2a9643d2820f113102c2c73b5662a6116cf1fff0;hb=9d6d641d1f8b77271800f43bc027b651f9aea52b;hp=89c993f672ad230de0aebbdb692e82b62fd772fb;hpb=ef038b258ebfef315cea06bcf27d92eada86e9d7;p=srvgit diff --git a/misc/translator/tmpl_process3.pl b/misc/translator/tmpl_process3.pl index 89c993f672..2a9643d282 100755 --- a/misc/translator/tmpl_process3.pl +++ b/misc/translator/tmpl_process3.pl @@ -4,6 +4,9 @@ # Parts copyright 2003-2004 Jerome Vizcaino # Parts copyright 2004 Ambrose Li +use FindBin; +use lib $FindBin::Bin; + =head1 NAME tmpl_process3.pl - Alternative version of tmpl_process.pl @@ -13,15 +16,15 @@ using gettext-compatible translation files use strict; #use warnings; FIXME - Bug 2505 -use Getopt::Long; +use File::Basename qw( fileparse ); +use Getopt::Long qw( GetOptions ); use Locale::PO; -use File::Temp qw( :POSIX ); use TmplTokenizer; use VerboseWarnings qw( :warn :die ); ############################################################################### -use vars qw( @in_files $in_dir $str_file $out_dir $quiet ); +use vars qw( @in_dirs @filenames @match @nomatch $str_file $out_dir $quiet ); use vars qw( @excludes $exclude_regex ); use vars qw( $recursive_p ); use vars qw( $pedantic_p ); @@ -31,7 +34,7 @@ use vars qw( $charset_in $charset_out ); ############################################################################### -sub find_translation ($) { +sub find_translation { my($s) = @_; my $key = $s; if ($s =~ /\S/s) { @@ -52,27 +55,43 @@ sub find_translation ($) { } } -sub text_replace_tag ($$) { +sub text_replace_tag { my($t, $attr) = @_; my $it; + my @ttvar; # value [tag=input], meta - my $tag = lc($1) if $t =~ /^<(\S+)/s; + my $tag = ($t =~ /^<(\S+)/s) ? lc($1) : undef; my $translated_p = 0; - for my $a ('alt', 'content', 'title', 'value','label') { + for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder', 'aria-label') { if ($attr->{$a}) { next if $a eq 'label' && $tag ne 'optgroup'; next if $a eq 'content' && $tag ne 'meta'; - next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio|text)$/)); # FIXME + next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio)$/)); # FIXME my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME if ($val =~ /\S/s) { - my $s = find_translation($val); - if ($attr->{$a}->[1] ne $s) { #FIXME - $attr->{$a}->[1] = $s; # FIXME - $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME - $translated_p = 1; - } + # for selected attributes replace '[%..%]' with '%s' and remember matches + if ( $a =~ /title|value|alt|content|placeholder|aria-label/ ) { + while ( $val =~ s/(\[\%.*?\%\])/\%s/ ) { + my $var = $1; + push @ttvar, $1; + } + } + # find translation for transformed attributes + my $s = find_translation($val); + # replace '%s' with original content (in order) on translated string, this is fragile! + if ( $a =~ /title|value|alt|content|placeholder|aria-label/ and @ttvar ) { + while ( @ttvar ) { + my $var = shift @ttvar; + $s =~ s/\%s/$var/; + } + } + if ($attr->{$a}->[1] ne $s) { #FIXME + $attr->{$a}->[1] = $s; # FIXME + $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME + $translated_p = 1; + } } } } @@ -87,6 +106,7 @@ sub text_replace_tag ($$) { } sort { $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME + || $a cmp $b # Sort attributes BZ 22236 } keys %$attr); $it .= '>'; } @@ -96,10 +116,10 @@ sub text_replace_tag ($$) { return $it; } -sub text_replace (**) { +sub text_replace { my($h, $output) = @_; for (;;) { - my $s = TmplTokenizer::next_token $h; + my $s = TmplTokenizer::next_token($h); last unless defined $s; my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes); if ($kind eq C4::TmplTokenType::TEXT) { @@ -117,7 +137,7 @@ sub text_replace (**) { for my $t (@{$s->js_data}) { # FIXME for this whole block if ($t->[0]) { - printf $output "%s%s%s", $t->[2], find_translation $t->[3], + printf $output "%s%s%s", $t->[2], find_translation($t->[3]), $t->[2]; } else { print $output $t->[1]; @@ -131,77 +151,90 @@ sub text_replace (**) { } } -sub listfiles ($$$) { +sub listfiles { my($dir, $type, $action) = @_; + my $filenames = join ('|', @filenames); # used to update strings from this file + my $match = join ('|', @match); # use only this files + my $nomatch = join ('|', @nomatch); # do no use this files my @it = (); if (opendir(DIR, $dir)) { - my @dirent = readdir DIR; # because DIR is shared when recursing - closedir DIR; - for my $dirent (@dirent) { - my $path = "$dir/$dirent"; - if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS' - || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) { - ; - } elsif (-f $path) { - push @it, $path if (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install'; - } elsif (-d $path && $recursive_p) { - push @it, listfiles($path, $type, $action); + my @dirent = readdir DIR; # because DIR is shared when recursing + closedir DIR; + for my $dirent (@dirent) { + my $path = "$dir/$dirent"; + if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS' + || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) { + ; + } elsif (-f $path) { + my $basename = fileparse( $path ); + push @it, $path + if ( not @filenames or $basename =~ /($filenames)/i ) + and ( not @match or $basename =~ /($match)/i ) # files to include + and ( not @nomatch or $basename !~ /($nomatch)/i ) # files not to include + and (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install'; + } elsif (-d $path && $recursive_p) { + push @it, listfiles($path, $type, $action); + } } - } } else { - warn_normal "$dir: $!", undef; + warn_normal("$dir: $!", undef); } return @it; } ############################################################################### -sub mkdir_recursive ($) { +sub mkdir_recursive { my($dir) = @_; local($`, $&, $', $1); $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/; my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir); mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix; if (!-d $dir) { - print STDERR "Making directory $dir..." unless $quiet; + print STDERR "Making directory $dir...\n" unless $quiet; # creates with rwxrwxr-x permissions - mkdir($dir, 0775) || warn_normal "$dir: $!", undef; + mkdir($dir, 0775) || warn_normal("$dir: $!", undef); } } ############################################################################### -sub usage ($) { +sub usage { my($exitcode) = @_; my $h = $exitcode? *STDERR: *STDOUT; print $h < \@in_files, + 'input|i=s' => \@in_dirs, + 'filename|f=s' => \@filenames, + 'match|m=s' => \@match, + 'nomatch|n=s' => \@nomatch, 'outputdir|o=s' => \$out_dir, 'recursive|r' => \$recursive_p, 'str-file|s=s' => \$str_file, @@ -220,57 +256,48 @@ GetOptions( 'quiet|q' => \$quiet, 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 }, 'help' => \&usage, -) || usage_error; +) || usage_error(); -VerboseWarnings::set_application_name $0; -VerboseWarnings::set_pedantic_mode $pedantic_p; - -# keep the buggy Locale::PO quiet if it says stupid things -$SIG{__WARN__} = sub { - my($s) = @_; - print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s - }; +VerboseWarnings::set_application_name($0); +VerboseWarnings::set_pedantic_mode($pedantic_p); my $action = shift or usage_error('You must specify an ACTION.'); usage_error('You must at least specify input and string list filenames.') - if !@in_files || !defined $str_file; + if !@in_dirs || !defined $str_file; # Type match defaults to *.tt plus *.inc if not specified -$type = "tt|inc|xsl" if !defined($type); +$type = "tt|inc|xsl|xml|def" if !defined($type); -# Check the inputs for being files or directories -for my $input (@in_files) { - usage_error("$input: Input must be a file or directory.\n" +# Check the inputs for being directories +for my $in_dir ( @in_dirs ) { + usage_error("$in_dir: Input must be a directory.\n" . "(Symbolic links are not supported at the moment)") - unless -d $input || -f $input;; + unless -d $in_dir; } # Generates the global exclude regular expression $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes; +my @in_files; # Generate the list of input files if a directory is specified -if (-d $in_files[0]) { - die "If you specify a directory as input, you must specify only it.\n" - if @in_files > 1; +# input is a directory, generates list of files to process - # input is a directory, generates list of files to process - $in_dir = $in_files[0]; - $in_dir =~ s/\/$//; # strips the trailing / if any - @in_files = listfiles($in_dir, $type, $action); -} else { - for my $input (@in_files) { +for my $fn ( @filenames ) { die "You cannot specify input files and directories at the same time.\n" - unless -f $input; - } + if -d $fn; +} +for my $in_dir ( @in_dirs ) { + $in_dir =~ s/\/$//; # strips the trailing / if any + @in_files = ( @in_files, listfiles($in_dir, $type, $action)); } # restores the string list from file -$href = Locale::PO->load_file_ashash($str_file); +$href = Locale::PO->load_file_ashash($str_file, 'utf-8'); # guess the charsets. HTML::Templates defaults to iso-8859-1 if (defined $href) { die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'}; - $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/; + $charset_out = TmplTokenizer::charset_canon($2) if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/; $charset_in = $charset_out; # for my $msgid (keys %$href) { # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) { @@ -289,105 +316,37 @@ if (defined $href) { next if $id_count == $str_count || $msg->{msgstr} eq '""' || grep { /fuzzy/ } @{$msg->{_flags}}; - warn_normal + warn_normal( "unconsistent %s count: ($id_count/$str_count):\n" . " line: " . $msg->{loaded_line_number} . "\n" . " msgid: " . $msg->{msgid} . "\n" . - " msgstr: " . $msg->{msgstr} . "\n", undef; + " msgstr: " . $msg->{msgstr} . "\n", undef); } } # set our charset in to UTF-8 if (!defined $charset_in) { - $charset_in = TmplTokenizer::charset_canon 'UTF-8'; - warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n"; + $charset_in = TmplTokenizer::charset_canon('UTF-8'); + warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n" unless ( $quiet ); } # set our charset out to UTF-8 if (!defined $charset_out) { - $charset_out = TmplTokenizer::charset_canon 'UTF-8'; - warn "Warning: Charset Out defaulting to $charset_out\n"; + $charset_out = TmplTokenizer::charset_canon('UTF-8'); + warn "Warning: Charset Out defaulting to $charset_out\n" unless ( $quiet ); } -my $xgettext = './xgettext.pl'; # actual text extractor script my $st; -if ($action eq 'create') { - # updates the list. As the list is empty, every entry will be added - if (!-s $str_file) { - warn "Removing empty file $str_file\n"; - unlink $str_file || die "$str_file: $!\n"; - } - die "$str_file: Output file already exists\n" if -f $str_file; - my($tmph1, $tmpfile1) = tmpnam(); - my($tmph2, $tmpfile2) = tmpnam(); - close $tmph2; # We just want a name - # Generate the temporary file that acts as /POTFILES.in - for my $input (@in_files) { - print $tmph1 "$input\n"; - } - close $tmph1; - warn "I $charset_in O $charset_out"; - # Generate the specified po file ($str_file) - $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2, - (defined $charset_in? ('-I', $charset_in): ()), - (defined $charset_out? ('-O', $charset_out): ()) - ); - # Run msgmerge so that the pot file looks like a real pot file - # We need to help msgmerge a bit by pre-creating a dummy po file that has - # the headers and the "" msgid & msgstr. It will fill in the rest. - if ($st == 0) { - # Merge the temporary "pot file" with the specified po file ($str_file) - # FIXME: msgmerge(1) is a Unix dependency - # FIXME: need to check the return value - unless (-f $str_file) { - local(*INPUT, *OUTPUT); - open(INPUT, "<$tmpfile2"); - open(OUTPUT, ">$str_file"); - while () { - print OUTPUT; - last if /^\n/s; - } - close INPUT; - close OUTPUT; - } - $st = system("msgmerge -U ".($quiet?'-q':'')." -s $str_file $tmpfile2"); - } else { - error_normal "Text extraction failed: $xgettext: $!\n", undef; - error_additional "Will not run msgmerge\n", undef; - } -# unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef; -# unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef; - -} elsif ($action eq 'update') { - my($tmph1, $tmpfile1) = tmpnam(); - my($tmph2, $tmpfile2) = tmpnam(); - close $tmph2; # We just want a name - # Generate the temporary file that acts as /POTFILES.in - for my $input (@in_files) { - print $tmph1 "$input\n"; - } - close $tmph1; - # Generate the temporary file that acts as /.pot - $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2, - '--po-mode', - (defined $charset_in? ('-I', $charset_in): ()), - (defined $charset_out? ('-O', $charset_out): ())); - if ($st == 0) { - # Merge the temporary "pot file" with the specified po file ($str_file) - # FIXME: msgmerge(1) is a Unix dependency - # FIXME: need to check the return value - $st = system("msgmerge -U ".($quiet?'-q':'')." -s $str_file $tmpfile2"); - } else { - error_normal "Text extraction failed: $xgettext: $!\n", undef; - error_additional "Will not run msgmerge\n", undef; - } -# unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef; -# unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef; - -} elsif ($action eq 'install') { +if ($action eq 'install') { if(!defined($out_dir)) { usage_error("You must specify an output directory when using the install method."); } + if ( scalar @in_dirs > 1 ) { + usage_error("You must specify only one input directory when using the install method."); + } + + my $in_dir = shift @in_dirs; + if ($in_dir eq $out_dir) { warn "You must specify a different input and output directory.\n"; exit -1; @@ -398,30 +357,28 @@ if ($action eq 'create') { -d $out_dir || die "$out_dir: The directory does not exist\n"; # Try to open the file, because Locale::PO doesn't check :-/ - open(INPUT, "<$str_file") || die "$str_file: $!\n"; - close INPUT; + open(my $fh, '<', $str_file) || die "$str_file: $!\n"; + close $fh; # creates the new tmpl file using the new translation for my $input (@in_files) { die "Assertion failed" unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/"; -# print "$input / $type\n"; + + my $target = $out_dir . substr($input, length($in_dir)); + my $targetdir = ($target =~ /[^\/]+$/s) ? $` : undef; + if (!defined $type || $input =~ /\.(?:$type)$/) { my $h = TmplTokenizer->new( $input ); $h->set_allow_cformat( 1 ); - VerboseWarnings::set_input_file_name $input; - - my $target = $out_dir . substr($input, length($in_dir)); - my $targetdir = $` if $target =~ /[^\/]+$/s; + VerboseWarnings::set_input_file_name($input); mkdir_recursive($targetdir) unless -d $targetdir; print STDERR "Creating $target...\n" unless $quiet; - open( OUTPUT, ">$target" ) || die "$target: $!\n"; - text_replace( $h, *OUTPUT ); - close OUTPUT; + open( my $fh, ">:encoding(UTF-8)", "$target" ) || die "$target: $!\n"; + text_replace( $h, $fh ); + close $fh; } else { # just copying the file - my $target = $out_dir . substr($input, length($in_dir)); - my $targetdir = $` if $target =~ /[^\/]+$/s; mkdir_recursive($targetdir) unless -d $targetdir; system("cp -f $input $target"); print STDERR "Copying $input...\n" unless $quiet; @@ -507,14 +464,6 @@ translation, it can be suppressed with the %0.0s notation. Using the PO format also means translators can add their own comments in the translation files, if necessary. -=item - - -Create, update, and install actions are all based on the -same scanner module. This ensures that update and install -have the same idea of what is a translatable string; -attribute names in tags, for example, will not be -accidentally translated. - =back =head1 NOTES @@ -522,22 +471,8 @@ accidentally translated. Anchors are represented by an > notation. The meaning of this non-standard notation might not be obvious. -The create action calls xgettext.pl to do the actual work; -the update action calls xgettext.pl and msgmerge(1) to do the -actual work. - =head1 BUGS -xgettext.pl must be present in the current directory; the -msgmerge(1) command must also be present in the search path. -The script currently does not check carefully whether these -dependent commands are present. - -Locale::PO(3) has a lot of bugs. It can neither parse nor -generate GNU PO files properly; a couple of workarounds have -been written in TmplTokenizer and more is likely to be needed -(e.g., to get rid of the "Strange line" warning for #~). - This script may not work in Windows. There are probably some other bugs too, since this has not been @@ -545,12 +480,7 @@ tested very much. =head1 SEE ALSO -xgettext.pl, TmplTokenizer.pm, -msgmerge(1), Locale::PO(3), -translator_doc.txt - -http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms =cut