X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=misc%2Ftranslator%2FTmplTokenizer.pm;h=05cc54ea6b6c6afb70964b44b886d45832837250;hb=9d6d641d1f8b77271800f43bc027b651f9aea52b;hp=d54b7ca1b49c4898ac077fa73c7716e9866be2da;hpb=9bb2554e39c583fb994a9d6bb487a75fc7c7251e;p=srvgit diff --git a/misc/translator/TmplTokenizer.pm b/misc/translator/TmplTokenizer.pm index d54b7ca1b4..05cc54ea6b 100644 --- a/misc/translator/TmplTokenizer.pm +++ b/misc/translator/TmplTokenizer.pm @@ -1,15 +1,12 @@ package TmplTokenizer; -use strict; -#use warnings; FIXME - Bug 2505 -use TmplTokenType; -use TmplToken; -use TTParser; +use Modern::Perl; +use C4::TmplTokenType; +use C4::TmplToken; +use C4::TTParser; use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic ); require Exporter; -use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); - ############################################################################### =head1 NAME @@ -24,10 +21,6 @@ A wrapper for the functionality found in TTParser to allow an easier transition ############################################################################### -$VERSION = 0.02; - -@ISA = qw(Exporter); -@EXPORT_OK = qw(); use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p ); use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p ); @@ -68,7 +61,7 @@ sub new { shift; my ($filename) = @_; #open my $handle,$filename or die "can't open $filename"; - my $parser = TTParser->new; + my $parser = C4::TTParser->new; $parser->build_tokens( $filename ); bless { filename => $filename, @@ -123,7 +116,7 @@ sub _set_js_mode { return $this; } -#used in xgettext, tmpl_process3 and text-extract2 +#used in xgettext, tmpl_process3 sub set_allow_cformat { my $this = shift; $this->{+ALLOW_CFORMAT_P} = $_[0]; @@ -140,7 +133,7 @@ BEGIN { sub parenleft () { '(' } sub parenright () { ')' } -sub _split_js ($) { +sub _split_js { my ($s0) = @_; my @it = (); while (length $s0) { @@ -192,12 +185,13 @@ sub STATE_STRING_LITERAL () { 3 } # XXX This is a crazy hack. I don't want to write an ECMAScript parser. # XXX A scanner is one thing; a parser another thing. -sub _identify_js_translatables (@) { +sub _identify_js_translatables { my @input = @_; my @output = (); # We mark a JavaScript translatable string as in C, i.e., _("literal") # For simplicity, we ONLY look for "_" "(" StringLiteral ")" for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) { +# warn $input[$i]; my $reset_state_p = 0; push @output, [0, $input[$i]]; if ($input[$i] !~ /\S/s) { @@ -221,20 +215,23 @@ sub _identify_js_translatables (@) { die "identify_js_translatables internal error: Unknown state $state" } } +# use Data::Dumper; +# warn Dumper \@output; return \@output; } ############################################################################### -sub string_canon ($) { +sub string_canon { my $s = shift; # Fold all whitespace into single blanks $s =~ s/\s+/ /g; + $s =~ s/^\s+//g; return $s; } # safer version used internally, preserves new lines -sub string_canon_safe ($) { +sub string_canon_safe { my $s = shift; # fold tabs and spaces into single spaces $s =~ s/[\ \t]+/ /gs; @@ -250,16 +247,16 @@ sub _quote_cformat{ sub _formalize_string_cformat{ my $s = shift; - return _quote_cformat( string_canon_safe $s ); + return _quote_cformat( string_canon_safe($s) ); } sub _formalize{ my $t = shift; - if( $t->type == TmplTokenType::DIRECTIVE ){ + if( $t->type == C4::TmplTokenType::DIRECTIVE ){ return '%s'; - } elsif( $t->type == TmplTokenType::TEXT ){ + } elsif( $t->type == C4::TmplTokenType::TEXT ){ return _formalize_string_cformat( $t->string ); - } elsif( $t->type == TmplTokenType::TAG ){ + } elsif( $t->type == C4::TmplTokenType::TAG ){ if( $t->string =~ m/^a\b/is ){ return ''; } elsif( $t->string =~ m/^input\b/is ){ @@ -270,20 +267,20 @@ sub _formalize{ } } else{ return _quote_cformat $t->string; - } + } } else{ return _quote_cformat $t->string; } } # internal parametization, used within next_token -# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a TmplTokenType::TEXT_PARAMETRIZED +# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a C4::TmplTokenType::TEXT_PARAMETRIZED sub _parametrize_internal{ my $this = shift; my @parts = @_; # my $s = ""; # for my $item (@parts){ - # if( $item->type == TmplTokenType::TEXT ){ + # if( $item->type == C4::TmplTokenType::TEXT ){ # $s .= $item->string; # } else { # #must be a variable directive @@ -293,7 +290,7 @@ sub _parametrize_internal{ my $s = join( "", map { _formalize $_ } @parts ); # should both the string and form be $s? maybe only the later? posibly the former.... # used line number from first token, should suffice - my $t = TmplToken->new( $s, TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename ); + my $t = C4::TmplToken->new( $s, C4::TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename ); $t->set_children(@parts); $t->set_form($s); return $t; @@ -302,31 +299,49 @@ sub _parametrize_internal{ sub next_token { my $self = shift; my $next; +# warn "in next_token"; # parts that make up a text_parametrized (future children of the token) my @parts = (); while(1){ - # warn Dumper @parts; $next = $self->{_parser}->next_token; if (! $next){ if (@parts){ return $self->_parametrize_internal(@parts); } else { - return undef; + return; } } # if cformat mode is off, dont bother parametrizing, just return them as they come return $next unless $self->allow_cformat_p; - if( $next->type == TmplTokenType::TEXT ){ + if( $next->type == C4::TmplTokenType::TEXT ){ push @parts, $next; } - elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){ +# elsif( $next->type == C4::TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){ + elsif( $next->type == C4::TmplTokenType::DIRECTIVE ){ push @parts, $next; } + elsif ( $next->type == C4::TmplTokenType::CDATA){ + $self->_set_js_mode(1); + my $s0 = $next->string; + my @head = (); + my @tail = (); + + if ($s0 =~ /^(\s*\[%\s*)(.*)(\s%=]\s*)$/s) { + push @head, $1; + push @tail, $3; + $s0 = $2; + } + push @head, _split_js($s0); + $next->set_js_data(_identify_js_translatables(@head, @tail) ); + return $next unless @parts; + $self->{_parser}->unshift_token($next); + return $self->_parametrize_internal(@parts); + } else { # if there is nothing in parts, return this token - - return $next unless @parts; + return $next unless @parts; + # OTHERWISE, put this token back and return the parametrized string of @parts $self->{_parser}->unshift_token($next); return $self->_parametrize_internal(@parts); @@ -339,7 +354,7 @@ sub next_token { # function taken from old version # used by tmpl_process3 -sub parametrize ($$$$) { +sub parametrize { my($fmt_0, $cformat_p, $t, $f) = @_; my $it = ''; if ($cformat_p) { @@ -359,13 +374,13 @@ sub parametrize ($$$$) { ; } elsif (defined $params[$i - 1]) { my $param = $params[$i - 1]; - warn_normal "$fmt_0: $&: Expected a TMPL_VAR, but found a " - . $param->type->to_string . "\n", undef - if $param->type != TmplTokenType::DIRECTIVE; - warn_normal "$fmt_0: $&: Unsupported " - . "field width or precision\n", undef + warn_normal("$fmt_0: $&: Expected a TMPL_VAR, but found a " + . $param->type->to_string . "\n", undef) + if $param->type != C4::TmplTokenType::DIRECTIVE; + warn_normal("$fmt_0: $&: Unsupported " + . "field width or precision\n", undef) if defined $width || defined $prec; - warn_normal "$fmt_0: $&: Parameter $i not known", undef + warn_normal("$fmt_0: $&: Parameter $i not known", undef) unless defined $param; $it .= defined $f? &$f( $param ): $param->string; } @@ -376,27 +391,27 @@ sub parametrize ($$$$) { my $param = $params[$i - 1]; if (!defined $param) { - warn_normal "$fmt_0: $&: Parameter $i not known", undef; + warn_normal("$fmt_0: $&: Parameter $i not known", undef); } else { - if ($param->type == TmplTokenType::TAG + if ($param->type == C4::TmplTokenType::TAG && $param->string =~ /^attributes? lc($param->attributes->{'type'}->[1]): undef; if ($conv eq 'S') { - warn_normal "$fmt_0: $&: Expected type=text, " - . "but found type=$type", undef + warn_normal("$fmt_0: $&: Expected type=text, " + . "but found type=$type", undef) unless $type eq 'text'; } elsif ($conv eq 'p') { - warn_normal "$fmt_0: $&: Expected type=radio, " - . "but found type=$type", undef + warn_normal("$fmt_0: $&: Expected type=radio, " + . "but found type=$type", undef) unless $type eq 'radio'; } } else { - warn_normal "$&: Expected an INPUT, but found a " - . $param->type->to_string . "\n", undef + warn_normal("$&: Expected an INPUT, but found a " + . $param->type->to_string . "\n", undef) } - warn_normal "$fmt_0: $&: Unsupported " - . "field width or precision\n", undef + warn_normal("$fmt_0: $&: Unsupported " + . "field width or precision\n", undef) if defined $width || defined $prec; $it .= defined $f? &$f( $param ): $param->string; } @@ -405,7 +420,7 @@ sub parametrize ($$$$) { $it .= $&; die "$&: Unknown or unsupported format specification\n"; #XXX } else { - die "$&: Completely confused parametrizing\n";#XXX + die "$&: Completely confused parametrizing -- msgid: $fmt_0\n";#XXX } } } @@ -419,7 +434,7 @@ sub parametrize ($$$$) { my $i = $1; $fmt = $'; my $anchor = $anchors[$i - 1]; - warn_normal "$&: Anchor $1 not found for msgid \"$fmt_0\"", undef #FIXME + warn_normal("$&: Anchor $1 not found for msgid \"$fmt_0\"", undef) #FIXME unless defined $anchor; $it .= $anchor->string; } else { @@ -432,12 +447,12 @@ sub parametrize ($$$$) { # Other simple functions (These are not methods) -sub blank_p ($) { +sub blank_p { my($s) = @_; return $s =~ /^(?:\s|\ $re_end_entity|$re_tmpl_var|$re_xsl)*$/osi; } -sub trim ($) { +sub trim { my($s0) = @_; my $l0 = length $s0; my $s = $s0; @@ -446,16 +461,16 @@ sub trim ($) { return wantarray? (substr($s0, 0, $l1), $s, substr($s0, $l0 - $l2)): $s; } -sub quote_po ($) { +sub quote_po { my($s) = @_; # Locale::PO->quote is buggy, it doesn't quote newlines :-/ - $s =~ s/([\\"])/\\\1/gs; + $s =~ s/([\\"])/\\$1/gs; $s =~ s/\n/\\n/g; #$s =~ s/[\177-\377]/ sprintf("\\%03o", ord($&)) /egs; return "\"$s\""; } -sub charset_canon ($) { +sub charset_canon { my($charset) = @_; $charset = uc($charset); $charset = "$1-$2" if $charset =~ /^(ISO|UTF)(\d.*)/i; @@ -488,7 +503,7 @@ use vars qw( @latin1_utf8 ); "\303\270", "\303\271", "\303\272", "\303\273", "\303\274", "\303\275", "\303\276", "\303\277" ); -sub charset_convert ($$$) { +sub charset_convert { my($s, $charset_in, $charset_out) = @_; if ($s !~ /[\200-\377]/s) { # FIXME: don't worry about iso2022 for now ;