package TmplTokenizer;
-use strict;
-#use warnings; FIXME - Bug 2505
-use TmplTokenType;
-use TmplToken;
-use TTParser;
+use Modern::Perl;
+use C4::TmplTokenType;
+use C4::TmplToken;
+use C4::TTParser;
use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic );
require Exporter;
-use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
-
###############################################################################
=head1 NAME
###############################################################################
-$VERSION = 0.02;
-
-@ISA = qw(Exporter);
-@EXPORT_OK = qw();
use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
shift;
my ($filename) = @_;
#open my $handle,$filename or die "can't open $filename";
- my $parser = TTParser->new;
+ my $parser = C4::TTParser->new;
$parser->build_tokens( $filename );
bless {
filename => $filename,
return $this;
}
-#used in xgettext, tmpl_process3 and text-extract2
+#used in xgettext, tmpl_process3
sub set_allow_cformat {
my $this = shift;
$this->{+ALLOW_CFORMAT_P} = $_[0];
sub parenleft () { '(' }
sub parenright () { ')' }
-sub _split_js ($) {
+sub _split_js {
my ($s0) = @_;
my @it = ();
while (length $s0) {
# XXX This is a crazy hack. I don't want to write an ECMAScript parser.
# XXX A scanner is one thing; a parser another thing.
-sub _identify_js_translatables (@) {
+sub _identify_js_translatables {
my @input = @_;
my @output = ();
# We mark a JavaScript translatable string as in C, i.e., _("literal")
# For simplicity, we ONLY look for "_" "(" StringLiteral ")"
for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
+# warn $input[$i];
my $reset_state_p = 0;
push @output, [0, $input[$i]];
if ($input[$i] !~ /\S/s) {
die "identify_js_translatables internal error: Unknown state $state"
}
}
+# use Data::Dumper;
+# warn Dumper \@output;
return \@output;
}
###############################################################################
-sub string_canon ($) {
+sub string_canon {
my $s = shift;
# Fold all whitespace into single blanks
$s =~ s/\s+/ /g;
+ $s =~ s/^\s+//g;
return $s;
}
# safer version used internally, preserves new lines
-sub string_canon_safe ($) {
+sub string_canon_safe {
my $s = shift;
# fold tabs and spaces into single spaces
$s =~ s/[\ \t]+/ /gs;
sub _formalize_string_cformat{
my $s = shift;
- return _quote_cformat( string_canon_safe $s );
+ return _quote_cformat( string_canon_safe($s) );
}
sub _formalize{
my $t = shift;
- if( $t->type == TmplTokenType::DIRECTIVE ){
+ if( $t->type == C4::TmplTokenType::DIRECTIVE ){
return '%s';
- } elsif( $t->type == TmplTokenType::TEXT ){
+ } elsif( $t->type == C4::TmplTokenType::TEXT ){
return _formalize_string_cformat( $t->string );
- } elsif( $t->type == TmplTokenType::TAG ){
+ } elsif( $t->type == C4::TmplTokenType::TAG ){
if( $t->string =~ m/^a\b/is ){
return '<a>';
} elsif( $t->string =~ m/^input\b/is ){
}
} else{
return _quote_cformat $t->string;
- }
+ }
} else{
return _quote_cformat $t->string;
}
}
# internal parametization, used within next_token
-# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a TmplTokenType::TEXT_PARAMETRIZED
+# method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a C4::TmplTokenType::TEXT_PARAMETRIZED
sub _parametrize_internal{
my $this = shift;
my @parts = @_;
# my $s = "";
# for my $item (@parts){
- # if( $item->type == TmplTokenType::TEXT ){
+ # if( $item->type == C4::TmplTokenType::TEXT ){
# $s .= $item->string;
# } else {
# #must be a variable directive
my $s = join( "", map { _formalize $_ } @parts );
# should both the string and form be $s? maybe only the later? posibly the former....
# used line number from first token, should suffice
- my $t = TmplToken->new( $s, TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename );
+ my $t = C4::TmplToken->new( $s, C4::TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename );
$t->set_children(@parts);
$t->set_form($s);
return $t;
sub next_token {
my $self = shift;
my $next;
+# warn "in next_token";
# parts that make up a text_parametrized (future children of the token)
my @parts = ();
while(1){
- # warn Dumper @parts;
$next = $self->{_parser}->next_token;
if (! $next){
if (@parts){
return $self->_parametrize_internal(@parts);
}
else {
- return undef;
+ return;
}
}
# if cformat mode is off, dont bother parametrizing, just return them as they come
return $next unless $self->allow_cformat_p;
- if( $next->type == TmplTokenType::TEXT ){
+ if( $next->type == C4::TmplTokenType::TEXT ){
push @parts, $next;
}
- elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
+# elsif( $next->type == C4::TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
+ elsif( $next->type == C4::TmplTokenType::DIRECTIVE ){
push @parts, $next;
}
+ elsif ( $next->type == C4::TmplTokenType::CDATA){
+ $self->_set_js_mode(1);
+ my $s0 = $next->string;
+ my @head = ();
+ my @tail = ();
+
+ if ($s0 =~ /^(\s*\[%\s*)(.*)(\s%=]\s*)$/s) {
+ push @head, $1;
+ push @tail, $3;
+ $s0 = $2;
+ }
+ push @head, _split_js($s0);
+ $next->set_js_data(_identify_js_translatables(@head, @tail) );
+ return $next unless @parts;
+ $self->{_parser}->unshift_token($next);
+ return $self->_parametrize_internal(@parts);
+ }
else {
# if there is nothing in parts, return this token
-
- return $next unless @parts;
+ return $next unless @parts;
+
# OTHERWISE, put this token back and return the parametrized string of @parts
$self->{_parser}->unshift_token($next);
return $self->_parametrize_internal(@parts);
# function taken from old version
# used by tmpl_process3
-sub parametrize ($$$$) {
+sub parametrize {
my($fmt_0, $cformat_p, $t, $f) = @_;
my $it = '';
if ($cformat_p) {
;
} elsif (defined $params[$i - 1]) {
my $param = $params[$i - 1];
- warn_normal "$fmt_0: $&: Expected a TMPL_VAR, but found a "
- . $param->type->to_string . "\n", undef
- if $param->type != TmplTokenType::DIRECTIVE;
- warn_normal "$fmt_0: $&: Unsupported "
- . "field width or precision\n", undef
+ warn_normal("$fmt_0: $&: Expected a TMPL_VAR, but found a "
+ . $param->type->to_string . "\n", undef)
+ if $param->type != C4::TmplTokenType::DIRECTIVE;
+ warn_normal("$fmt_0: $&: Unsupported "
+ . "field width or precision\n", undef)
if defined $width || defined $prec;
- warn_normal "$fmt_0: $&: Parameter $i not known", undef
+ warn_normal("$fmt_0: $&: Parameter $i not known", undef)
unless defined $param;
$it .= defined $f? &$f( $param ): $param->string;
}
my $param = $params[$i - 1];
if (!defined $param) {
- warn_normal "$fmt_0: $&: Parameter $i not known", undef;
+ warn_normal("$fmt_0: $&: Parameter $i not known", undef);
} else {
- if ($param->type == TmplTokenType::TAG
+ if ($param->type == C4::TmplTokenType::TAG
&& $param->string =~ /^<input\b/is) {
my $type = defined $param->attributes?
lc($param->attributes->{'type'}->[1]): undef;
if ($conv eq 'S') {
- warn_normal "$fmt_0: $&: Expected type=text, "
- . "but found type=$type", undef
+ warn_normal("$fmt_0: $&: Expected type=text, "
+ . "but found type=$type", undef)
unless $type eq 'text';
} elsif ($conv eq 'p') {
- warn_normal "$fmt_0: $&: Expected type=radio, "
- . "but found type=$type", undef
+ warn_normal("$fmt_0: $&: Expected type=radio, "
+ . "but found type=$type", undef)
unless $type eq 'radio';
}
} else {
- warn_normal "$&: Expected an INPUT, but found a "
- . $param->type->to_string . "\n", undef
+ warn_normal("$&: Expected an INPUT, but found a "
+ . $param->type->to_string . "\n", undef)
}
- warn_normal "$fmt_0: $&: Unsupported "
- . "field width or precision\n", undef
+ warn_normal("$fmt_0: $&: Unsupported "
+ . "field width or precision\n", undef)
if defined $width || defined $prec;
$it .= defined $f? &$f( $param ): $param->string;
}
$it .= $&;
die "$&: Unknown or unsupported format specification\n"; #XXX
} else {
- die "$&: Completely confused parametrizing\n";#XXX
+ die "$&: Completely confused parametrizing -- msgid: $fmt_0\n";#XXX
}
}
}
my $i = $1;
$fmt = $';
my $anchor = $anchors[$i - 1];
- warn_normal "$&: Anchor $1 not found for msgid \"$fmt_0\"", undef #FIXME
+ warn_normal("$&: Anchor $1 not found for msgid \"$fmt_0\"", undef) #FIXME
unless defined $anchor;
$it .= $anchor->string;
} else {
# Other simple functions (These are not methods)
-sub blank_p ($) {
+sub blank_p {
my($s) = @_;
return $s =~ /^(?:\s|\ $re_end_entity|$re_tmpl_var|$re_xsl)*$/osi;
}
-sub trim ($) {
+sub trim {
my($s0) = @_;
my $l0 = length $s0;
my $s = $s0;
return wantarray? (substr($s0, 0, $l1), $s, substr($s0, $l0 - $l2)): $s;
}
-sub quote_po ($) {
+sub quote_po {
my($s) = @_;
# Locale::PO->quote is buggy, it doesn't quote newlines :-/
- $s =~ s/([\\"])/\\\1/gs;
+ $s =~ s/([\\"])/\\$1/gs;
$s =~ s/\n/\\n/g;
#$s =~ s/[\177-\377]/ sprintf("\\%03o", ord($&)) /egs;
return "\"$s\"";
}
-sub charset_canon ($) {
+sub charset_canon {
my($charset) = @_;
$charset = uc($charset);
$charset = "$1-$2" if $charset =~ /^(ISO|UTF)(\d.*)/i;
"\303\270", "\303\271", "\303\272", "\303\273", "\303\274", "\303\275",
"\303\276", "\303\277" );
-sub charset_convert ($$$) {
+sub charset_convert {
my($s, $charset_in, $charset_out) = @_;
if ($s !~ /[\200-\377]/s) { # FIXME: don't worry about iso2022 for now
;