X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=misc%2Ftranslator%2FTmplTokenizer.pm;h=2cdff317e39b7ffbe57a20ebbaf52bfe0b48622e;hb=583abead1b3d537f78a3d558981b2e9abb2791ff;hp=77fa1c8899e7c4709c68a1fdf8a902942d9942d6;hpb=48cc802e46d09c9b1acd4b13105fefab88acbe18;p=srvgit diff --git a/misc/translator/TmplTokenizer.pm b/misc/translator/TmplTokenizer.pm index 77fa1c8899..2cdff317e3 100644 --- a/misc/translator/TmplTokenizer.pm +++ b/misc/translator/TmplTokenizer.pm @@ -2,9 +2,9 @@ package TmplTokenizer; use strict; #use warnings; FIXME - Bug 2505 -use TmplTokenType; -use TmplToken; -use TTParser; +use C4::TmplTokenType; +use C4::TmplToken; +use C4::TTParser; use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic ); require Exporter; @@ -48,27 +48,27 @@ use vars qw( $serial ); ############################################################################### -sub FATAL_P () {'fatal-p'} -sub SYNTAXERROR_P () {'syntaxerror-p'} +sub FATAL_P {'fatal-p'} +sub SYNTAXERROR_P {'syntaxerror-p'} -sub FILENAME () {'input'} -#sub HANDLE () {'handle'} +sub FILENAME {'input'} +#sub HANDLE {'handle'} -#sub READAHEAD () {'readahead'} -sub LINENUM_START () {'lc_0'} -sub LINENUM () {'lc'} -sub CDATA_MODE_P () {'cdata-mode-p'} -sub CDATA_CLOSE () {'cdata-close'} -#sub PCDATA_MODE_P () {'pcdata-mode-p'} # additional submode for CDATA -sub JS_MODE_P () {'js-mode-p'} # cdata-mode-p must also be true +#sub READAHEAD {'readahead'} +sub LINENUM_START {'lc_0'} +sub LINENUM {'lc'} +sub CDATA_MODE_P {'cdata-mode-p'} +sub CDATA_CLOSE {'cdata-close'} +#sub PCDATA_MODE_P {'pcdata-mode-p'} # additional submode for CDATA +sub JS_MODE_P {'js-mode-p'} # cdata-mode-p must also be true -sub ALLOW_CFORMAT_P () {'allow-cformat-p'} +sub ALLOW_CFORMAT_P {'allow-cformat-p'} sub new { shift; my ($filename) = @_; #open my $handle,$filename or die "can't open $filename"; - my $parser = TTParser->new; + my $parser = C4::TTParser->new; $parser->build_tokens( $filename ); bless { filename => $filename, @@ -137,10 +137,10 @@ BEGIN { # Perl quoting is really screwed up, but this common subexp is way too long $js_EscapeSequence = q{\\\\(?:['"\\\\bfnrt]|[^0-7xu]|[0-3]?[0-7]{1,2}|x[\da-fA-F]{2}|u[\da-fA-F]{4})}; } -sub parenleft () { '(' } -sub parenright () { ')' } +sub parenleft { '(' } +sub parenright { ')' } -sub _split_js ($) { +sub _split_js { my ($s0) = @_; my @it = (); while (length $s0) { @@ -186,13 +186,13 @@ sub _split_js ($) { return @it; } -sub STATE_UNDERSCORE () { 1 } -sub STATE_PARENLEFT () { 2 } -sub STATE_STRING_LITERAL () { 3 } +sub STATE_UNDERSCORE { 1 } +sub STATE_PARENLEFT { 2 } +sub STATE_STRING_LITERAL { 3 } # XXX This is a crazy hack. I don't want to write an ECMAScript parser. # XXX A scanner is one thing; a parser another thing. -sub _identify_js_translatables (@) { +sub _identify_js_translatables { my @input = @_; my @output = (); # We mark a JavaScript translatable string as in C, i.e., _("literal") @@ -229,15 +229,16 @@ sub _identify_js_translatables (@) { ############################################################################### -sub string_canon ($) { +sub string_canon ) { my $s = shift; # Fold all whitespace into single blanks $s =~ s/\s+/ /g; + $s =~ s/^\s+//g; return $s; } # safer version used internally, preserves new lines -sub string_canon_safe ($) { +sub string_canon_safe { my $s = shift; # fold tabs and spaces into single spaces $s =~ s/[\ \t]+/ /gs; @@ -258,11 +259,11 @@ sub _formalize_string_cformat{ sub _formalize{ my $t = shift; - if( $t->type == TmplTokenType::DIRECTIVE ){ + if( $t->type == C4::TmplTokenType::DIRECTIVE ){ return '%s'; - } elsif( $t->type == TmplTokenType::TEXT ){ + } elsif( $t->type == C4::TmplTokenType::TEXT ){ return _formalize_string_cformat( $t->string ); - } elsif( $t->type == TmplTokenType::TAG ){ + } elsif( $t->type == C4::TmplTokenType::TAG ){ if( $t->string =~ m/^a\b/is ){ return ''; } elsif( $t->string =~ m/^input\b/is ){ @@ -280,13 +281,13 @@ sub _formalize{ } # internal parametization, used within next_token -# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a TmplTokenType::TEXT_PARAMETRIZED +# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a C4::TmplTokenType::TEXT_PARAMETRIZED sub _parametrize_internal{ my $this = shift; my @parts = @_; # my $s = ""; # for my $item (@parts){ - # if( $item->type == TmplTokenType::TEXT ){ + # if( $item->type == C4::TmplTokenType::TEXT ){ # $s .= $item->string; # } else { # #must be a variable directive @@ -296,7 +297,7 @@ sub _parametrize_internal{ my $s = join( "", map { _formalize $_ } @parts ); # should both the string and form be $s? maybe only the later? posibly the former.... # used line number from first token, should suffice - my $t = TmplToken->new( $s, TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename ); + my $t = C4::TmplToken->new( $s, C4::TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename ); $t->set_children(@parts); $t->set_form($s); return $t; @@ -320,13 +321,14 @@ sub next_token { } # if cformat mode is off, dont bother parametrizing, just return them as they come return $next unless $self->allow_cformat_p; - if( $next->type == TmplTokenType::TEXT ){ + if( $next->type == C4::TmplTokenType::TEXT ){ push @parts, $next; } - elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){ +# elsif( $next->type == C4::TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){ + elsif( $next->type == C4::TmplTokenType::DIRECTIVE ){ push @parts, $next; } - elsif ( $next->type == TmplTokenType::CDATA){ + elsif ( $next->type == C4::TmplTokenType::CDATA){ $self->_set_js_mode(1); my $s0 = $next->string; my @head = (); @@ -359,7 +361,7 @@ sub next_token { # function taken from old version # used by tmpl_process3 -sub parametrize ($$$$) { +sub parametrize { my($fmt_0, $cformat_p, $t, $f) = @_; my $it = ''; if ($cformat_p) { @@ -381,7 +383,7 @@ sub parametrize ($$$$) { my $param = $params[$i - 1]; warn_normal "$fmt_0: $&: Expected a TMPL_VAR, but found a " . $param->type->to_string . "\n", undef - if $param->type != TmplTokenType::DIRECTIVE; + if $param->type != C4::TmplTokenType::DIRECTIVE; warn_normal "$fmt_0: $&: Unsupported " . "field width or precision\n", undef if defined $width || defined $prec; @@ -398,7 +400,7 @@ sub parametrize ($$$$) { if (!defined $param) { warn_normal "$fmt_0: $&: Parameter $i not known", undef; } else { - if ($param->type == TmplTokenType::TAG + if ($param->type == C4::TmplTokenType::TAG && $param->string =~ /^attributes? lc($param->attributes->{'type'}->[1]): undef; @@ -425,7 +427,7 @@ sub parametrize ($$$$) { $it .= $&; die "$&: Unknown or unsupported format specification\n"; #XXX } else { - die "$&: Completely confused parametrizing\n";#XXX + die "$&: Completely confused parametrizing -- msgid: $fmt_0\n";#XXX } } } @@ -452,12 +454,12 @@ sub parametrize ($$$$) { # Other simple functions (These are not methods) -sub blank_p ($) { +sub blank_p { my($s) = @_; return $s =~ /^(?:\s|\ $re_end_entity|$re_tmpl_var|$re_xsl)*$/osi; } -sub trim ($) { +sub trim { my($s0) = @_; my $l0 = length $s0; my $s = $s0; @@ -466,7 +468,7 @@ sub trim ($) { return wantarray? (substr($s0, 0, $l1), $s, substr($s0, $l0 - $l2)): $s; } -sub quote_po ($) { +sub quote_po { my($s) = @_; # Locale::PO->quote is buggy, it doesn't quote newlines :-/ $s =~ s/([\\"])/\\\1/gs; @@ -475,7 +477,7 @@ sub quote_po ($) { return "\"$s\""; } -sub charset_canon ($) { +sub charset_canon { my($charset) = @_; $charset = uc($charset); $charset = "$1-$2" if $charset =~ /^(ISO|UTF)(\d.*)/i; @@ -508,7 +510,7 @@ use vars qw( @latin1_utf8 ); "\303\270", "\303\271", "\303\272", "\303\273", "\303\274", "\303\275", "\303\276", "\303\277" ); -sub charset_convert ($$$) { +sub charset_convert { my($s, $charset_in, $charset_out) = @_; if ($s !~ /[\200-\377]/s) { # FIXME: don't worry about iso2022 for now ;