- my($h) = @_;
- my($it, $kind);
- my $eof_p = 0;
- $this->_pop_readahead if $this->has_readahead_p
- && !ref $this->_peek_readahead
- && !length $this->_peek_readahead;
- if (!$this->has_readahead_p) {
- my $next = scalar <$h>;
- $eof_p = !defined $next;
- if (!$eof_p) {
- $this->_increment_line_number;
- $this->_push_readahead( $next );
- }
- }
- $this->_set_line_number_start( $this->line_number ); # remember 1st line num
- if ($this->has_readahead_p && ref $this->_peek_readahead) { # TmplToken obj.
- ($it, $kind) = ($this->_pop_readahead, undef);
- } elsif ($eof_p && !$this->has_readahead_p) { # nothing left to do
- ;
- } elsif ($this->_peek_readahead =~ /^\s+/s) { # whitespace
- ($kind, $it) = (TmplTokenType::TEXT, $&);
- $this->_set_readahead( $' );
- # FIXME the following (the [<\s] part) is an unreliable HACK :-(
- } elsif ($this->_peek_readahead =~ /^(?:[^<]|<[<\s])*(?:[^<\s])/s) { # non-space normal text
- ($kind, $it) = (TmplTokenType::TEXT, $&);
- $this->_set_readahead( $' );
- warn_normal "Unescaped < in $it\n", $this->line_number_start
- if !$this->cdata_mode_p && $it =~ /</s;
- } else { # tag/declaration/processing instruction
- my $ok_p = 0;
- my $bad_comment_p = 0;
- for (my $cdata_close = $this->cdata_close;;) {
- if ($this->cdata_mode_p) {
- my $next = $this->_pop_readahead;
- if ($next =~ /^$cdata_close/is) {
- ($kind, $it) = (TmplTokenType::TAG, $&);
- $this->_push_readahead( $' );
- $ok_p = 1;
- } elsif ($next =~ /^((?:(?!$cdata_close).)+)($cdata_close)/is) {
- ($kind, $it) = (TmplTokenType::TEXT, $1);
- $this->_push_readahead( "$2$'" );
- $ok_p = 1;
- } else {
- ($kind, $it) = (TmplTokenType::TEXT, $next);
- $ok_p = 1;
- }
- } elsif ($this->_peek_readahead =~ /^$re_tag_compat/os) {
- # If we detect a "closed start tag" but we know that the
- # following token looks like a TMPL_VAR, don't stop
- my($head, $tail, $post) = ($1, $2, $3);
- if ($tail eq '' && $post =~ $re_tmpl_var) {
- # Don't bother to show the warning if we're too confused
- # FIXME. There's no method for _closed_start_tag_warning
- if (!defined $this->{'_closed_start_tag_warning'}
- || ($this->{'_closed_start_tag_warning'}->[0] eq $head
- && $this->{'_closed_start_tag_warning'}->[1] != $this->line_number - 1)) {
- warn_normal "Possible SGML \"closed start tag\" notation: $head<\n", $this->line_number
- if split(/\n/, $head) < 10;
- }
- $this->{'_closed_start_tag_warning'} = [$head, $this->line_number];
- } else {
- ($kind, $it) = (TmplTokenType::TAG, "$head>");
- $this->_set_readahead( $post );
- $ok_p = 1;
- warn_normal "SGML \"closed start tag\" notation: $head<\n", $this->line_number if $tail eq ''
- and $head ne '<!DOCTYPE stylesheet ['; # another bit of temporary ugliness for bug 4472
- }
- } elsif ($this->_peek_readahead =~ /^<!--(?:(?!-->)$re_directive*.)*-->/os) {
- ($kind, $it) = (TmplTokenType::COMMENT, $&);
- $this->_set_readahead( $' );
- $ok_p = 1;
- $bad_comment_p = 1;
- }
- last if $ok_p;
- my $next = scalar <$h>;
- $eof_p = !defined $next;
- last if $eof_p;
- $this->_increment_line_number;
- $this->_append_readahead( $next );
- }
- if ($kind ne TmplTokenType::TAG) {
- ;
- } elsif ($it =~ /^<!/) {
- $kind = TmplTokenType::DECL;
- $kind = TmplTokenType::COMMENT if $it =~ /^<!--(?:(?!-->).)*-->/;
- if ($kind == TmplTokenType::COMMENT && $it =~ /^<!--\s*#include/s) {
- warn_normal "Apache #include directive found instead of HTML::Template <TMPL_INCLUDE> directive", $this->line_number_start;
- }
- } elsif ($it =~ /^<\?/) {
- $kind = TmplTokenType::PI;
- }
- if ($it =~ /^$re_directive/ios && !$this->cdata_mode_p) {
- $kind = TmplTokenType::DIRECTIVE;
- } elsif ($bad_comment_p) {
- warn_normal sprintf("Syntax error in comment: %s\n", $it),
- $this->line_number_start;
- $this->_set_syntaxerror( 1 );
- }
- if (!$ok_p && $eof_p) {
- ($kind, $it) = (TmplTokenType::UNKNOWN, $this->_peek_readahead);
- $this->_set_readahead, undef;
- $this->_set_syntaxerror( 1 );
- }
- }
- warn_normal "Unrecognizable token found: "
- . (split(/\n/, $it) < 10? $it: '(too confused to show details)')
- . "\n", $this->line_number_start
- if $kind == TmplTokenType::UNKNOWN;
- return defined $it? (ref $it? $it: TmplToken->new($it, $kind, $this->line_number, $this->filename)): undef;
-}
-
-sub _next_token_intermediate {
- my $this = shift;
- my $h = $this->_handle;
- my $it;
- if (!$this->cdata_mode_p) {
- $it = $this->_next_token_internal($h);
- if (defined $it && $it->type == TmplTokenType::TAG) {
- if ($it->string =~ /^<(script|style|textarea)\b/is ||
- ($this->filename =~ /(opensearch)|(opac-showreviews-rss)/ && $it->string =~ /^<(description)\b/) # FIXME special case to handle
- # a CDATA in opac-opensearch.tmpl and opac-showreviews-rss.tmpl
- ) {
- $this->_set_cdata_mode( 1 );
- $this->_set_cdata_close( "</$1\\s*>" );
- $this->_set_pcdata_mode( 0 );
- $this->_set_js_mode( lc($1) eq 'script' );
-# } elsif ($it->string =~ /^<(title)\b/is) {
-# $this->_set_cdata_mode( 1 );
-# $this->_set_cdata_close( "</$1\\s*>" );
-# $this->_set_pcdata_mode( 1 );
- }
- $it->set_attributes( $this->_extract_attributes($it->string, $it->line_number) );
- }
- } else {
- my $eof_p = 0;
- for ($it = '', my $cdata_close = $this->cdata_close;;) {
- my $next = $this->_next_token_internal($h);
- $eof_p = !defined $next;
- last if $eof_p;
- if (defined $next && $next->string =~ /$cdata_close/is) {
- $this->_push_readahead( $next ); # push entire TmplToken object
- $this->_set_cdata_mode( 0 );
- }
- last unless $this->cdata_mode_p;
- $it .= $next->string;
- }
- if ($eof_p) {
- $it = undef;
- error_normal "Unexpected end of file while looking for "
- . $this->cdata_close
- . "\n", $this->line_number_start;
- $this->_set_fatal( 1 );
- $this->_set_syntaxerror( 1 );
- }
- if ($this->pcdata_mode_p) {
- my $check = $it;
- $check =~ s/$re_directive//gos;
- warn_pedantic "Markup found in PCDATA\n", $this->line_number,
- \$pedantic_error_markup_in_pcdata_p
- if $check =~ /$re_tag_compat/s;
- }
- # PCDATA should be treated as text, not CDATA
- # Actually it should be treated as TEXT_PARAMETRIZED :-(
- $it = TmplToken->new( $it,
- ($this->pcdata_mode_p?
- TmplTokenType::TEXT: TmplTokenType::CDATA),
- $this->line_number, $this->filename )
- if defined $it;
- if ($this->js_mode_p) {
- my $s0 = $it->string;
- my @head = ();
- my @tail = ();
- if ($s0 =~ /^(\s*<!--\s*)(.*)(\s*--\s*>\s*)$/s) {
- push @head, $1;
- push @tail, $3;
- $s0 = $2;
- }
- push @head, split_js $s0;
- $it->set_js_data( identify_js_translatables(@head, @tail) );
- }
- $this->_set_pcdata_mode, 0;
- $this->_set_cdata_close, undef unless !defined $it;
- }
- return $it;
-}
-
-sub _token_groupable1_p ($) { # as first token, groupable into TEXT_PARAMETRIZED
- my($t) = @_;
- return ($t->type == TmplTokenType::TEXT && $t->string !~ /^[,\.:\|\s]+$/is)
- || ($t->type == TmplTokenType::DIRECTIVE
- && $t->string =~ /^(?:$re_tmpl_var)$/os)
- || ($t->type == TmplTokenType::TAG
- && ($t->string =~ /^<(?:a|b|em|h[123456]|i|u)\b/is
- || ($t->string =~ /^<input\b/is
- && $t->attributes->{'type'}->[1] =~ /^(?:radio|text)$/is)
- ))
-}
-
-sub _token_groupable2_p ($) { # as other token, groupable into TEXT_PARAMETRIZED
- my($t) = @_;
- return ($t->type == TmplTokenType::TEXT && ($t->string =~ /^\s*$/s || $t->string !~ /^[\|\s]+$/is))
- || ($t->type == TmplTokenType::DIRECTIVE
- && $t->string =~ /^(?:$re_tmpl_var)$/os)
- || ($t->type == TmplTokenType::TAG
- && ($t->string =~ /^<\/?(?:a|b|em|h[123456]|i|u)\b/is
- || ($t->string =~ /^<input\b/is
- && $t->attributes->{'type'}->[1] =~ /^(?:radio|text)$/is)))
-}
-
-sub _quote_cformat ($) {
- my($s) = @_;
- $s =~ s/%/%%/g;
- return $s;
-}
-
-sub string_canon ($) {
- my($s) = @_;
- if (1) { # FIXME
- # Fold all whitespace into single blanks
- $s =~ s/\s+/ /gs;
- }
- return $s;
-}
-
-sub _formalize_string_cformat ($) {
- my($s) = @_;
- return _quote_cformat string_canon $s;
-}
-
-sub _formalize ($) {
- my($t) = @_;
- return $t->type == TmplTokenType::DIRECTIVE? '%s':
- $t->type == TmplTokenType::TEXT?
- _formalize_string_cformat($t->string):
- $t->type == TmplTokenType::TAG?
- ($t->string =~ /^<a\b/is? '<a>':
- $t->string =~ /^<input\b/is? (
- lc $t->attributes->{'type'}->[1] eq 'text' ? '%S':
- '%p'):
- _quote_cformat($t->string)):
- _quote_cformat($t->string);
-}
-
-sub _optimize {
- my $this = shift;
- my @structure = @_;
- my $undo_trailing_blanks = sub {
- for (my $i = $#structure; $i >= 0; $i -= 1) {
- last unless ($structure[$i]->type == TmplTokenType::TEXT && blank_p($structure[$i]->string)) ;#|| ($structure[$i]->type == TmplTokenType::TAG && $structure[$i]->string =~ /^<br\b/is);
- # Queue element structure: [reanalysis-p, token]
- push @{$this->{_queue}}, [1, pop @structure];
- }
- };
- &$undo_trailing_blanks;
- while (@structure >= 2) {
- my $something_done_p = 0;
- # FIXME: If the last token is a close tag but there are no tags
- # FIXME: before it, drop the close tag back into the queue. This
- # FIXME: is an ugly hack to get rid of "foo %s</h1>" type mess.
- if (@structure >= 2
- && $structure[$#structure]->type == TmplTokenType::TAG
- && $structure[$#structure]->string =~ /^<\//s) {
- my $has_other_tags_p = 0;
- for (my $i = 0; $i < $#structure; $i += 1) {
- $has_other_tags_p = 1
- if $structure[$i]->type == TmplTokenType::TAG;
- last if $has_other_tags_p;
- }
- if (!$has_other_tags_p) {
- push @{$this->{_queue}}, [0, pop @structure]
- &$undo_trailing_blanks;
- $something_done_p = 1;
- }
- }
- # FIXME: Do the same ugly hack for the last token being a ( or [
- if (@structure >= 2
- && $structure[$#structure]->type == TmplTokenType::TEXT
- && $structure[$#structure]->string =~ /^[\(\[]$/) { # not )]
- push @{$this->{_queue}}, [1, pop @structure];
- &$undo_trailing_blanks;
- $something_done_p = 1;
- }
- # FIXME: If the first token is an open tag, but there is no
- # FIXME: corresponding close tag, "drop the open tag", i.e.,
- # FIXME: requeue everything for reanalysis, except the frist tag. :-(
- if (@structure >= 2
- && $structure[0]->type == TmplTokenType::TAG
- && $structure[0]->string =~ /^<([a-z0-9]+)/is
- && (my $tag = $1) !~ /^(?:br|hr|img|input)\b/is
- ) {
- my $tag_open_count = 1;
- for (my $i = 1; $i <= $#structure; $i += 1) {
- if ($structure[$i]->type == TmplTokenType::TAG) {
- if ($structure[$i]->string =~ /^<(\/?)$tag\b/is) {
- $tag_open_count += ($1? -1: +1);
- }
- }
- }
- if ($tag_open_count > 0) {
- for (my $i = $#structure; $i; $i -= 1) {
- push @{$this->{_queue}}, [1, pop @structure];
- }
- $something_done_p = 1;
- }
- }
- # FIXME: If the first token is an open tag, the last token is the
- # FIXME: corresponding close tag, and there are no other close tags
- # FIXME: inbetween, requeue the tokens from the second token on,
- # FIXME: flagged as ok for re-analysis
- if (@structure >= 3
- && $structure[0]->type == TmplTokenType::TAG
- && $structure[0]->string =~ /^<([a-z0-9]+)/is && (my $tag = $1)
- && $structure[$#structure]->type == TmplTokenType::TAG
- && $structure[$#structure]->string =~ /^<\/$1\s*>$/is) {
- my $has_other_open_or_close_tags_p = 0;
- for (my $i = 1; $i < $#structure; $i += 1) {
- $has_other_open_or_close_tags_p = 1
- if $structure[$i]->type == TmplTokenType::TAG
- && $structure[$i]->string =~ /^<\/?$tag\b/is;
- last if $has_other_open_or_close_tags_p;
- }
- if (!$has_other_open_or_close_tags_p) {
- for (my $i = $#structure; $i; $i -= 1) {
- push @{$this->{_queue}}, [1, pop @structure];
- }
- $something_done_p = 1;
- }
- }
- last if !$something_done_p;
- }
- return @structure;
-}
-
-sub looks_plausibly_like_groupable_text_p (@) {
- my @structure = @_;
- # The text would look plausibly groupable if all open tags are also closed.
- my @tags = ();
- my $error_p = 0;
- for (my $i = 0; $i <= $#structure; $i += 1) {
- if ($structure[$i]->type == TmplTokenType::TAG) {
- my $form = $structure[$i]->string;
- if ($form =~ /^<([A-Z0-9]+)/is) {
- my $tag = lc($1);
- if ($tag !~ /^(?:br|input)$/is && $form !~ /\/>$/is) {
- push @tags, $tag;
- }
- } elsif ($form =~ /^<\/([A-Z0-9]+)/is) {
- if (@tags && lc($1) eq $tags[$#tags]) {
- pop @tags;
- } else {
- $error_p = 1;
- }
- }
- } elsif ($structure[$i]->type != TmplTokenType::TEXT) {
- $error_p = 1;
- }
- last if $error_p;
- }
- return !$error_p && !@tags;