Bug 17600: Standardize our EXPORT_OK

[srvgit] / misc / translator / xgettext.pl
diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl

index 8d55547..fcfe1bd 100755 (executable)
--- a/misc/translator/xgettext.pl
+++ b/misc/translator/xgettext.pl
@@ -1,15 +1,33 @@
  #!/usr/bin/perl
  
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
+
  =head1 NAME
  
  xgettext.pl - xgettext(1)-like interface for .tt strings extraction
  
  =cut
  
+use FindBin;
+use lib $FindBin::Bin;
+
  use strict;
  use warnings;
-use Getopt::Long;
-use POSIX;
+use Getopt::Long qw( GetOptions );
+use POSIX qw( close exit localtime open printf time );
  use Locale::PO;
  use TmplTokenizer;
  use VerboseWarnings;
@@ -41,6 +59,7 @@ sub string_negligible_p {
             || $t =~ /^[A-Za-z]$/               # single letters
              || $t =~ /^(&[a-z]+;|&#\d+;|&#x[0-9a-fA-F]+;|%%|%s|\s|[[:punct:]])*$/ # html entities,placeholder,punct, ...
          || ( $t =~ /^\[\%.*\%\]$/ and $t !~ /\%\].*\[\%/ )    # pure TT entities
+        || $t =~ /^\s*<\?.*\?>/                               # ignore xml prolog
         )
  }
  
@@ -98,7 +117,7 @@ sub string_list {
  sub text_extract {
      my($h) = @_;
      for (;;) {
-        my $s = TmplTokenizer::next_token $h;
+        my $s = TmplTokenizer::next_token($h);
          last unless defined $s;
          my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
          if ($kind eq C4::TmplTokenType::TEXT) {
@@ -113,14 +132,19 @@ sub text_extract {
              # value [tag=input], meta
              my $tag;
              $tag = lc($1) if $t =~ /^<(\S+)/s;
-            for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder') {
+            for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder', 'aria-label') {
                  if ($attr->{$a}) {
                      next if $a eq 'label' && $tag ne 'optgroup';
                      next if $a eq 'content' && $tag ne 'meta';
                      next if $a eq 'value' && ($tag ne 'input'
                          || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio|checkbox)$/)); # FIXME
                      my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
-                    $val = TmplTokenizer::trim $val;
+                    $val = TmplTokenizer::trim($val);
+                    # for selected attributes replace '[%..%]' with '%s' globally
+                    if ( $a =~ /title|value|alt|content|placeholder|aria-label/ ) {
+                        $val =~ s/\[\%.*?\%\]/\%s/g;
+                    }
+                    # save attribute text for translation
                      remember( $s, $val ) if $val =~ /\S/s;
                  }
              }
@@ -146,7 +170,7 @@ sub generate_strings_list {
  sub generate_po_file {
      # We don't emit the Plural-Forms header; it's meaningless for us
      my $pot_charset = (defined $charset_out? $charset_out: 'CHARSET');
-    $pot_charset = TmplTokenizer::charset_canon $pot_charset;
+    $pot_charset = TmplTokenizer::charset_canon($pot_charset);
      # Time stamps aren't exactly right semantically. I don't know how to fix it.
      my $time = POSIX::strftime('%Y-%m-%d %H:%M%z', localtime(time));
      my $time_pot = $time;
@@ -164,7 +188,7 @@ EOF
      print $OUTPUT <<EOF;
  msgid ""
  msgstr ""
-"Project-Id-Version: PACKAGE VERSION\\n"
+"Project-Id-Version: Koha\\n"
  "POT-Creation-Date: $time_pot\\n"
  "PO-Revision-Date: $time_po\\n"
  "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\\n"
@@ -235,9 +259,11 @@ EOF
             $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED;
         }
          printf $OUTPUT "#, c-format\n" if $cformat_p;
-        printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po
-               TmplTokenizer::string_canon
-               TmplTokenizer::charset_convert $t, $charset_in, $charset_out;
+        printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po(
+            TmplTokenizer::string_canon(
+                TmplTokenizer::charset_convert($t, $charset_in, $charset_out)
+            )
+        );
          printf $OUTPUT "msgstr %s\n\n", (defined $translation{$t}?
                 TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
      }
@@ -246,8 +272,8 @@ EOF
  ###############################################################################
  
  sub convert_translation_file {
-    open(my $INPUT, '<', $convert_from) || die "$convert_from: $!\n";
-    VerboseWarnings::set_input_file_name $convert_from;
+    open(my $INPUT, '<:encoding(utf-8)', $convert_from) || die "$convert_from: $!\n";
+    VerboseWarnings::set_input_file_name($convert_from);
      while (<$INPUT>) {
         chomp;
         my($msgid, $msgstr) = split(/\t/);
@@ -264,13 +290,13 @@ sub convert_translation_file {
         $translation{$msgid} = $msgstr unless $msgstr eq '*****';
  
         if ($msgid  =~ /\bcharset=(["']?)([^;\s"']+)\1/s) {
-           my $candidate = TmplTokenizer::charset_canon $2;
+        my $candidate = TmplTokenizer::charset_canon($2);
             die "Conflicting charsets in msgid: $candidate vs $charset_in\n"
                     if defined $charset_in && $charset_in ne $candidate;
             $charset_in = $candidate;
         }
         if ($msgstr =~ /\bcharset=(["']?)([^;\s"']+)\1/s) {
-           my $candidate = TmplTokenizer::charset_canon $2;
+        my $candidate = TmplTokenizer::charset_canon($2);
             die "Conflicting charsets in msgid: $candidate vs $charset_out\n"
                     if defined $charset_out && $charset_out ne $candidate;
             $charset_out = $candidate;
@@ -278,8 +304,8 @@ sub convert_translation_file {
      }
      # The following assumption is correct; that's what HTML::Template assumes
      if (!defined $charset_in) {
-       $charset_in = $charset_out = TmplTokenizer::charset_canon 'utf-8';
-       warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
+        $charset_in = $charset_out = TmplTokenizer::charset_canon('utf-8');
+        warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
      }
  }
  
@@ -346,8 +372,8 @@ GetOptions(
      'help'                             => sub { usage(0) },
  ) || usage_error;
  
-VerboseWarnings::set_application_name $0;
-VerboseWarnings::set_pedantic_mode $pedantic_p;
+VerboseWarnings::set_application_name($0);
+VerboseWarnings::set_pedantic_mode($pedantic_p);
  
  usage_error('Missing mandatory option -f')
         unless defined $files_from || defined $convert_from;
@@ -358,7 +384,7 @@ usage_error('You cannot specify both --convert-from and --files-from')
  
  if (defined $output && $output ne '-') {
      print STDERR "$0: Opening output file \"$output\"\n" if $verbose_p;
-        open($OUTPUT, '>', $output) || die "$output: $!\n";
+    open($OUTPUT, '>:encoding(utf-8)', $output) || die "$output: $!\n";
  } else {
      print STDERR "$0: Outputting to STDOUT...\n" if $verbose_p;
      open($OUTPUT, ">&STDOUT");
@@ -366,13 +392,13 @@ if (defined $output && $output ne '-') {
  
  if (defined $files_from) {
      print STDERR "$0: Opening input file list \"$files_from\"\n" if $verbose_p;
-    open(my $INPUT, '<', $files_from) || die "$files_from: $!\n";
+    open(my $INPUT, '<:encoding(utf-8)', $files_from) || die "$files_from: $!\n";
      while (<$INPUT>) {
         chomp;
         my $input = /^\//? $_: "$directory/$_";
         my $h = TmplTokenizer->new( $input );
         $h->set_allow_cformat( 1 );
-       VerboseWarnings::set_input_file_name $input;
+    VerboseWarnings::set_input_file_name($input);
         print STDERR "$0: Processing file \"$input\"\n" if $verbose_p;
         text_extract( $h );
      }
@@ -393,8 +419,7 @@ exit(-1) if TmplTokenizer::fatal_p;
  
  =head1 DESCRIPTION
  
-This is an experimental script based on the modularized
-text-extract2.pl script.  It has behaviour similar to
+This script has behaviour similar to
  xgettext(1), and generates gettext-compatible output files.
  
  A gettext-like format provides the following advantages: