Bug 17600: Standardize our EXPORT_OK

[srvgit] / misc / translator / TmplTokenizer.pm
diff --git a/misc/translator/TmplTokenizer.pm b/misc/translator/TmplTokenizer.pm

index d54b7ca..05cc54e 100644 (file)
--- a/misc/translator/TmplTokenizer.pm
+++ b/misc/translator/TmplTokenizer.pm
@@ -1,15 +1,12 @@
  package TmplTokenizer;
  
-use strict;
-#use warnings; FIXME - Bug 2505
-use TmplTokenType;
-use TmplToken;
-use TTParser;
+use Modern::Perl;
+use C4::TmplTokenType;
+use C4::TmplToken;
+use C4::TTParser;
  use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic );
  require Exporter;
  
-use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
-
  ###############################################################################
  
  =head1 NAME
@@ -24,10 +21,6 @@ A wrapper for the functionality found in TTParser to allow an easier transition
  
  ###############################################################################
  
-$VERSION = 0.02;
-
-@ISA = qw(Exporter);
-@EXPORT_OK = qw();
  
  use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
  use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
@@ -68,7 +61,7 @@ sub new {
      shift;
      my ($filename) = @_;
      #open my $handle,$filename or die "can't open $filename";
-    my $parser = TTParser->new;
+    my $parser = C4::TTParser->new;
      $parser->build_tokens( $filename );
      bless {
        filename => $filename,
@@ -123,7 +116,7 @@ sub _set_js_mode {
      return $this;
  }
  
-#used in xgettext, tmpl_process3 and text-extract2
+#used in xgettext, tmpl_process3
  sub set_allow_cformat {
      my $this = shift;
      $this->{+ALLOW_CFORMAT_P} = $_[0];
@@ -140,7 +133,7 @@ BEGIN {
  sub parenleft  () { '(' }
  sub parenright () { ')' }
  
-sub _split_js ($) {
+sub _split_js {
      my ($s0) = @_;
      my @it = ();
      while (length $s0) {
@@ -192,12 +185,13 @@ sub STATE_STRING_LITERAL () { 3 }
  
  # XXX This is a crazy hack. I don't want to write an ECMAScript parser.
  # XXX A scanner is one thing; a parser another thing.
-sub _identify_js_translatables (@) {
+sub _identify_js_translatables {
      my @input = @_;
      my @output = ();
      # We mark a JavaScript translatable string as in C, i.e., _("literal")
      # For simplicity, we ONLY look for "_" "(" StringLiteral ")"
      for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
+#        warn $input[$i];
          my $reset_state_p = 0;
          push @output, [0, $input[$i]];
          if ($input[$i] !~ /\S/s) {
@@ -221,20 +215,23 @@ sub _identify_js_translatables (@) {
            die "identify_js_translatables internal error: Unknown state $state"
          }
      }
+#    use Data::Dumper;
+#    warn Dumper \@output;
      return \@output;
  }
  
  ###############################################################################
  
-sub string_canon ($) {
+sub string_canon {
    my $s = shift;
    # Fold all whitespace into single blanks
    $s =~ s/\s+/ /g;
+  $s =~ s/^\s+//g;
    return $s;
  }
  
  # safer version used internally, preserves new lines
-sub string_canon_safe ($) {
+sub string_canon_safe {
    my $s = shift;
    # fold tabs and spaces into single spaces
    $s =~ s/[\ \t]+/ /gs;
@@ -250,16 +247,16 @@ sub _quote_cformat{
  
  sub _formalize_string_cformat{
    my $s = shift;
-  return _quote_cformat( string_canon_safe $s );
+  return _quote_cformat( string_canon_safe($s) );
  }
  
  sub _formalize{
    my $t = shift;
-  if( $t->type == TmplTokenType::DIRECTIVE ){
+  if( $t->type == C4::TmplTokenType::DIRECTIVE ){
      return '%s';
-  } elsif( $t->type == TmplTokenType::TEXT ){
+  } elsif( $t->type == C4::TmplTokenType::TEXT ){
      return _formalize_string_cformat( $t->string );
-  } elsif( $t->type == TmplTokenType::TAG ){
+  } elsif( $t->type == C4::TmplTokenType::TAG ){
      if( $t->string =~ m/^a\b/is ){
        return '<a>';
      } elsif( $t->string =~ m/^input\b/is ){
@@ -270,20 +267,20 @@ sub _formalize{
        }
      } else{
        return _quote_cformat $t->string;
-    }
+    }    
    } else{
      return _quote_cformat $t->string;
    }
  }
  
  # internal parametization, used within next_token
-# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a TmplTokenType::TEXT_PARAMETRIZED
+# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a C4::TmplTokenType::TEXT_PARAMETRIZED
  sub _parametrize_internal{
      my $this = shift;
      my @parts = @_;
      # my $s = "";
      # for my $item (@parts){
-    #     if( $item->type == TmplTokenType::TEXT ){
+    #     if( $item->type == C4::TmplTokenType::TEXT ){
      #         $s .= $item->string;
      #     } else {
      #         #must be a variable directive
@@ -293,7 +290,7 @@ sub _parametrize_internal{
      my $s = join( "", map { _formalize $_ } @parts );
      # should both the string and form be $s? maybe only the later? posibly the former....
      # used line number from first token, should suffice
-    my $t = TmplToken->new( $s, TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename );
+    my $t = C4::TmplToken->new( $s, C4::TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename );
      $t->set_children(@parts);
      $t->set_form($s);
      return $t;
@@ -302,31 +299,49 @@ sub _parametrize_internal{
  sub next_token {
      my $self = shift;
      my $next;
+#    warn "in next_token";
      # parts that make up a text_parametrized (future children of the token)
      my @parts = ();
      while(1){
-        # warn Dumper @parts;
          $next = $self->{_parser}->next_token;
          if (! $next){
              if (@parts){
                  return $self->_parametrize_internal(@parts);
              }
              else {
-                return undef;
+                return;
              }
          }
          # if cformat mode is off, dont bother parametrizing, just return them as they come
          return $next unless $self->allow_cformat_p;
-        if( $next->type == TmplTokenType::TEXT ){
+        if( $next->type == C4::TmplTokenType::TEXT ){
              push @parts, $next;
          } 
-        elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
+#        elsif( $next->type == C4::TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
+        elsif( $next->type == C4::TmplTokenType::DIRECTIVE ){
              push @parts, $next;
          } 
+        elsif ( $next->type == C4::TmplTokenType::CDATA){
+            $self->_set_js_mode(1);
+            my $s0 = $next->string;
+            my @head = ();
+            my @tail = ();
+
+            if ($s0 =~ /^(\s*\[%\s*)(.*)(\s%=]\s*)$/s) {
+                push @head, $1;
+                 push @tail, $3;
+                $s0 = $2;
+            }
+            push @head, _split_js($s0);
+            $next->set_js_data(_identify_js_translatables(@head, @tail) );
+           return $next unless @parts;     
+           $self->{_parser}->unshift_token($next);
+            return $self->_parametrize_internal(@parts);
+        }
          else {
              # if there is nothing in parts, return this token
- 
-           return $next unless @parts;
+            return $next unless @parts;
+
              # OTHERWISE, put this token back and return the parametrized string of @parts
              $self->{_parser}->unshift_token($next);
              return $self->_parametrize_internal(@parts);
@@ -339,7 +354,7 @@ sub next_token {
  
  # function taken from old version
  # used by tmpl_process3
-sub parametrize ($$$$) {
+sub parametrize {
      my($fmt_0, $cformat_p, $t, $f) = @_;
      my $it = '';
      if ($cformat_p) {
@@ -359,13 +374,13 @@ sub parametrize ($$$$) {
                     ;
                 } elsif (defined $params[$i - 1]) {
                     my $param = $params[$i - 1];
-                   warn_normal "$fmt_0: $&: Expected a TMPL_VAR, but found a "
-                           . $param->type->to_string . "\n", undef
-                           if $param->type != TmplTokenType::DIRECTIVE;
-                   warn_normal "$fmt_0: $&: Unsupported "
-                               . "field width or precision\n", undef
+            warn_normal("$fmt_0: $&: Expected a TMPL_VAR, but found a "
+                . $param->type->to_string . "\n", undef)
+                if $param->type != C4::TmplTokenType::DIRECTIVE;
+            warn_normal("$fmt_0: $&: Unsupported "
+                . "field width or precision\n", undef)
                             if defined $width || defined $prec;
-                   warn_normal "$fmt_0: $&: Parameter $i not known", undef
+            warn_normal("$fmt_0: $&: Parameter $i not known", undef)
                             unless defined $param;
                     $it .= defined $f? &$f( $param ): $param->string;
                 }
@@ -376,27 +391,27 @@ sub parametrize ($$$$) {
  
                 my $param = $params[$i - 1];
                 if (!defined $param) {
-                   warn_normal "$fmt_0: $&: Parameter $i not known", undef;
+            warn_normal("$fmt_0: $&: Parameter $i not known", undef);
                 } else {
-                   if ($param->type == TmplTokenType::TAG
+                   if ($param->type == C4::TmplTokenType::TAG
                             && $param->string =~ /^<input\b/is) {
                         my $type = defined $param->attributes?
                                 lc($param->attributes->{'type'}->[1]): undef;
                         if ($conv eq 'S') {
-                           warn_normal "$fmt_0: $&: Expected type=text, "
-                                       . "but found type=$type", undef
+                warn_normal("$fmt_0: $&: Expected type=text, "
+                    . "but found type=$type", undef)
                                     unless $type eq 'text';
                         } elsif ($conv eq 'p') {
-                           warn_normal "$fmt_0: $&: Expected type=radio, "
-                                       . "but found type=$type", undef
+                warn_normal("$fmt_0: $&: Expected type=radio, "
+                    . "but found type=$type", undef)
                                     unless $type eq 'radio';
                         }
                     } else {
-                       warn_normal "$&: Expected an INPUT, but found a "
-                               . $param->type->to_string . "\n", undef
+                warn_normal("$&: Expected an INPUT, but found a "
+                    . $param->type->to_string . "\n", undef)
                     }
-                   warn_normal "$fmt_0: $&: Unsupported "
-                               . "field width or precision\n", undef
+            warn_normal("$fmt_0: $&: Unsupported "
+                . "field width or precision\n", undef)
                             if defined $width || defined $prec;
                     $it .= defined $f? &$f( $param ): $param->string;
                 }
@@ -405,7 +420,7 @@ sub parametrize ($$$$) {
                 $it .= $&;
                 die "$&: Unknown or unsupported format specification\n"; #XXX
             } else {
-               die "$&: Completely confused parametrizing\n";#XXX
+               die "$&: Completely confused parametrizing -- msgid: $fmt_0\n";#XXX
             }
         }
      }
@@ -419,7 +434,7 @@ sub parametrize ($$$$) {
             my $i  = $1;
             $fmt = $';
             my $anchor = $anchors[$i - 1];
-           warn_normal "$&: Anchor $1 not found for msgid \"$fmt_0\"", undef #FIXME
+        warn_normal("$&: Anchor $1 not found for msgid \"$fmt_0\"", undef) #FIXME
                     unless defined $anchor;
             $it .= $anchor->string;
         } else {
@@ -432,12 +447,12 @@ sub parametrize ($$$$) {
  
  # Other simple functions (These are not methods)
  
-sub blank_p ($) {
+sub blank_p {
      my($s) = @_;
      return $s =~ /^(?:\s|\&nbsp$re_end_entity|$re_tmpl_var|$re_xsl)*$/osi;
  }
  
-sub trim ($) {
+sub trim {
      my($s0) = @_;
      my $l0 = length $s0;
      my $s = $s0;
@@ -446,16 +461,16 @@ sub trim ($) {
      return wantarray? (substr($s0, 0, $l1), $s, substr($s0, $l0 - $l2)): $s;
  }
  
-sub quote_po ($) {
+sub quote_po {
      my($s) = @_;
      # Locale::PO->quote is buggy, it doesn't quote newlines :-/
-    $s =~ s/([\\"])/\\\1/gs;
+    $s =~ s/([\\"])/\\$1/gs;
      $s =~ s/\n/\\n/g;
      #$s =~ s/[\177-\377]/ sprintf("\\%03o", ord($&)) /egs;
      return "\"$s\"";
  }
  
-sub charset_canon ($) {
+sub charset_canon {
      my($charset) = @_;
      $charset = uc($charset);
      $charset = "$1-$2" if $charset =~ /^(ISO|UTF)(\d.*)/i;
@@ -488,7 +503,7 @@ use vars qw( @latin1_utf8 );
      "\303\270", "\303\271", "\303\272", "\303\273", "\303\274", "\303\275",
      "\303\276", "\303\277" );
  
-sub charset_convert ($$$) {
+sub charset_convert {
      my($s, $charset_in, $charset_out) = @_;
      if ($s !~ /[\200-\377]/s) { # FIXME: don't worry about iso2022 for now
         ;