package Text::Xslate::Parser; use Mouse; use Scalar::Util (); use Text::Xslate::Symbol; use Text::Xslate::Util qw( $DEBUG $STRING $NUMBER is_int any_in neat literal_to_value make_error p ); use constant _DUMP_PROTO => scalar($DEBUG =~ /\b dump=proto \b/xmsi); use constant _DUMP_TOKEN => scalar($DEBUG =~ /\b dump=token \b/xmsi); our @CARP_NOT = qw(Text::Xslate::Compiler Text::Xslate::Symbol); my $CODE = qr/ (?: $STRING | [^'"] ) /xms; my $COMMENT = qr/\# [^\n;]* (?= [;\n] | \z)/xms; # Operator tokens that the parser recognizes. # All the single characters are tokenized as an operator. my $OPERATOR_TOKEN = sprintf '(?:%s|[^ \t\r\n])', join('|', map{ quotemeta } qw( ... .. == != <=> <= >= << >> += -= *= /= %= ~= &&= ||= //= ~~ =~ && || // -> => :: ++ -- +| +& +^ +< +> +~ ), ','); my %shortcut_table = ( '=' => 'print', ); my $CHOMP_FLAGS = qr/-/xms; has identity_pattern => ( is => 'ro', isa => 'RegexpRef', builder => '_build_identity_pattern', init_arg => undef, ); sub _build_identity_pattern { return qr/(?: (?:[A-Za-z_]|\$\~?) [A-Za-z0-9_]* )/xms; } has [qw(compiler engine)] => ( is => 'rw', required => 0, weak_ref => 1, ); has symbol_table => ( # the global symbol table is => 'ro', isa => 'HashRef', default => sub{ {} }, init_arg => undef, ); has iterator_element => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_iterator_element', init_arg => undef, ); has scope => ( is => 'rw', isa => 'ArrayRef[HashRef]', clearer => 'init_scope', lazy => 1, default => sub{ [ {} ] }, init_arg => undef, ); has token => ( is => 'rw', isa => 'Maybe[Object]', init_arg => undef, ); has next_token => ( # to peek the next token is => 'rw', isa => 'Maybe[ArrayRef]', init_arg => undef, ); has statement_is_finished => ( is => 'rw', isa => 'Bool', init_arg => undef, ); has following_newline => ( is => 'rw', isa => 'Int', default => 0, init_arg => undef, ); has input => ( is => 'rw', isa => 'Str', init_arg => undef, ); has line_start => ( is => 'ro', isa => 'Maybe[Str]', builder => '_build_line_start', ); sub _build_line_start { ':' } has tag_start => ( is => 'ro', isa => 'Str', builder => '_build_tag_start', ); sub _build_tag_start { '<:' } has tag_end => ( is => 'ro', isa => 'Str', builder => '_build_tag_end', ); sub _build_tag_end { ':>' } has comment_pattern => ( is => 'ro', isa => 'RegexpRef', builder => '_build_comment_pattern', ); sub _build_comment_pattern { $COMMENT } has shortcut_table => ( is => 'ro', isa => 'HashRef[Str]', builder => '_build_shortcut_table', ); sub _build_shortcut_table { \%shortcut_table } has in_given => ( is => 'rw', isa => 'Bool', init_arg => undef, ); # attributes for error messages has near_token => ( is => 'rw', init_arg => undef, ); has file => ( is => 'rw', required => 0, ); has line => ( is => 'rw', required => 0, ); has input_layer => ( is => 'ro', default => ':utf8', ); sub symbol_class() { 'Text::Xslate::Symbol' } # the entry point sub parse { my($parser, $input, %args) = @_; local $parser->{file} = $args{file} || \$input; local $parser->{line} = $args{line} || 1; local $parser->{in_given} = 0; local $parser->{scope} = [ map { +{ %{$_} } } @{ $parser->scope } ]; local $parser->{symbol_table} = { %{ $parser->symbol_table } }; local $parser->{near_token}; local $parser->{next_token}; local $parser->{token}; local $parser->{input}; $parser->input( $parser->preprocess($input) ); $parser->next_token( $parser->tokenize() ); $parser->advance(); my $ast = $parser->statements(); if(my $input_pos = pos $parser->{input}) { if($input_pos != length($parser->{input})) { $parser->_error("Syntax error", $parser->token); } } return $ast; } sub trim_code { my($parser, $s) = @_; $s =~ s/\A [ \t]+ //xms; $s =~ s/ [ \t]+ \n?\z//xms; return $s; } sub auto_chomp { my($parser, $tokens_ref, $i, $s_ref) = @_; my $p; my $nl = 0; # postchomp if($i >= 1 and ($p = $tokens_ref->[$i-1])->[0] eq 'postchomp') { # [ CODE ][*][ TEXT ] # <: ... -:> \nfoobar # ^^^^ ${$s_ref} =~ s/\A [ \t]* (\n)//xms; if($1) { $nl++; } } # prechomp if(($i+1) < @{$tokens_ref} and ($p = $tokens_ref->[$i+1])->[0] eq 'prechomp') { if(${$s_ref} !~ / [^ \t] /xms) { # HERE # [ TEXT ][*][ CODE ] # <:- ... :> # ^^^^^^^^ ${$s_ref} = ''; } else { # HERE # [ TEXT ][*][ CODE ] # \n<:- ... :> # ^^ $nl += chomp ${$s_ref}; } } elsif(($i+2) < @{$tokens_ref} and ($p = $tokens_ref->[$i+2])->[0] eq 'prechomp' and ($p = $tokens_ref->[$i+1])->[0] eq 'text' and $p->[1] !~ / [^ \t] /xms) { # HERE # [ TEXT ][ TEXT ][*][ CODE ] # \n <:- ... :> # ^^^^^^^^^^ $p->[1] = ''; $nl += (${$s_ref} =~ s/\n\z//xms); } return $nl; } # split templates by tags before tokenizing sub split :method { my $parser = shift; local($_) = @_; my @tokens; my $line_start = $parser->line_start; my $tag_start = $parser->tag_start; my $tag_end = $parser->tag_end; my $lex_line_code = defined($line_start) && qr/\A ^ [ \t]* \Q$line_start\E ([^\n]* \n?) /xms; my $lex_tag_start = qr/\A \Q$tag_start\E ($CHOMP_FLAGS?)/xms; # 'text' is a something without newlines # following a newline, $tag_start, or end of the input my $lex_text = qr/\A ( [^\n]*? (?: \n | (?= \Q$tag_start\E ) | \z ) ) /xms; my $lex_comment = $parser->comment_pattern; my $lex_code = qr/(?: $lex_comment | $CODE )/xms; my $in_tag = 0; while($_ ne '') { if($in_tag) { my $start = 0; my $pos; while( ($pos = index $_, $tag_end, $start) >= 0 ) { my $code = substr $_, 0, $pos; $code =~ s/$lex_code//xmsg; if(length($code) == 0) { last; } $start = $pos + 1; } if($pos >= 0) { my $code = substr $_, 0, $pos, ''; $code =~ s/($CHOMP_FLAGS?) \z//xmso; my $chomp = $1; s/\A \Q$tag_end\E //xms or die "Oops!"; push @tokens, [ code => $code ]; if($chomp) { push @tokens, [ postchomp => $chomp ]; } $in_tag = 0; } else { last; # the end tag is not found } } # not $in_tag elsif($lex_line_code && (@tokens == 0 || $tokens[-1][1] =~ /\n\z/xms) && s/$lex_line_code//xms) { push @tokens, [ code => $1 ]; } elsif(s/$lex_tag_start//xms) { $in_tag = 1; my $chomp = $1; if($chomp) { push @tokens, [ prechomp => $chomp ]; } } elsif(s/$lex_text//xms) { push @tokens, [ text => $1 ]; } else { confess "Oops: Unreached code, near" . p($_); } } if($in_tag) { # calculate line number my $orig_src = $_[0]; substr $orig_src, -length($_), length($_), ''; my $line = ($orig_src =~ tr/\n/\n/); $parser->_error("Malformed templates detected", neat((split /\n/, $_)[0]), ++$line, ); } #p(\@tokens); return \@tokens; } sub preprocess { my($parser, $input) = @_; # tokenization my $tokens_ref = $parser->split($input); my $code = ''; my $shortcut_table = $parser->shortcut_table; my $shortcut = join('|', map{ quotemeta } keys %shortcut_table); my $shortcut_rx = qr/\A ($shortcut)/xms; for(my $i = 0; $i < @{$tokens_ref}; $i++) { my($type, $s) = @{ $tokens_ref->[$i] }; if($type eq 'text') { my $nl = $parser->auto_chomp($tokens_ref, $i, \$s); $s =~ s/(["\\])/\\$1/gxms; # " for poor editors # $s may have single new line $nl += ($s =~ s/\n/\\n/xms); $code .= qq{print_raw "$s";}; # must set even if $s is empty $code .= qq{\n} if $nl > 0; } elsif($type eq 'code') { # shortcut commands $s =~ s/$shortcut_rx/$shortcut_table->{$1}/xms if $shortcut; $s = $parser->trim_code($s); if($s =~ /\A \s* [}] \s* \z/xms){ $code .= $s; } elsif($s =~ s/\n\z//xms) { $code .= qq{$s\n}; } else { $code .= qq{$s;}; # auto semicolon insertion } } elsif($type eq 'prechomp') { # noop, just a marker } elsif($type eq 'postchomp') { # noop, just a marker } else { $parser->_error("Oops: Unknown token: $s ($type)"); } } print STDOUT $code, "\n" if _DUMP_PROTO; return $code; } sub BUILD { my($parser) = @_; $parser->_init_basic_symbols(); $parser->init_symbols(); return; } # The grammer sub _init_basic_symbols { my($parser) = @_; $parser->symbol('(end)')->is_block_end(1); # EOF # prototypes of value symbols foreach my $type (qw(name variable literal)) { my $s = $parser->symbol("($type)"); $s->arity($type); $s->set_nud( $parser->can("nud_$type") ); } # common separators $parser->symbol(';')->set_nud(\&nud_separator); $parser->define_pair('(' => ')'); $parser->define_pair('{' => '}'); $parser->define_pair('[' => ']'); $parser->symbol(',') ->is_comma(1); $parser->symbol('=>') ->is_comma(1); # common commands $parser->symbol('print') ->set_std(\&std_print); $parser->symbol('print_raw')->set_std(\&std_print); # special literals $parser->define_literal(nil => undef); $parser->define_literal(true => 1); $parser->define_literal(false => 0); # special tokens $parser->symbol('__FILE__')->set_nud(\&nud_current_file); $parser->symbol('__LINE__')->set_nud(\&nud_current_line); $parser->symbol('__ROOT__')->set_nud(\&nud_current_vars); return; } sub init_basic_operators { my($parser) = @_; # define operator precedence $parser->prefix('{', 256, \&nud_brace); $parser->prefix('[', 256, \&nud_brace); $parser->infix('(', 256, \&led_call); $parser->infix('.', 256, \&led_dot); $parser->infix('[', 256, \&led_fetch); $parser->prefix('(', 256, \&nud_paren); $parser->prefix('!', 200)->is_logical(1); $parser->prefix('+', 200); $parser->prefix('-', 200); $parser->prefix('+^', 200); # numeric bitwise negate $parser->infix('*', 190); $parser->infix('/', 190); $parser->infix('%', 190); $parser->infix('x', 190); $parser->infix('+&', 190); # numeric bitwise and $parser->infix('+', 180); $parser->infix('-', 180); $parser->infix('~', 180); # connect $parser->infix('+|', 180); # numeric bitwise or $parser->infix('+^', 180); # numeric bitwise xor $parser->prefix('defined', 170, \&nud_defined); # named unary operator $parser->infix('<', 160)->is_logical(1); $parser->infix('<=', 160)->is_logical(1); $parser->infix('>', 160)->is_logical(1); $parser->infix('>=', 160)->is_logical(1); $parser->infix('==', 150)->is_logical(1); $parser->infix('!=', 150)->is_logical(1); $parser->infix('<=>', 150); $parser->infix('cmp', 150); $parser->infix('~~', 150); $parser->infix('|', 140, \&led_pipe); $parser->infix('&&', 130)->is_logical(1); $parser->infix('||', 120)->is_logical(1); $parser->infix('//', 120)->is_logical(1); $parser->infix('min', 120); $parser->infix('max', 120); $parser->infix('..', 110, \&led_range); $parser->symbol(':'); $parser->infixr('?', 100, \&led_ternary); $parser->assignment('=', 90); $parser->assignment('+=', 90); $parser->assignment('-=', 90); $parser->assignment('*=', 90); $parser->assignment('/=', 90); $parser->assignment('%=', 90); $parser->assignment('~=', 90); $parser->assignment('&&=', 90); $parser->assignment('||=', 90); $parser->assignment('//=', 90); $parser->make_alias('!' => 'not')->ubp(70); $parser->make_alias('&&' => 'and')->lbp(60); $parser->make_alias('||' => 'or') ->lbp(50); return; } sub init_symbols { my($parser) = @_; my $s; # syntax specific separators $parser->symbol('{'); $parser->symbol('}')->is_block_end(1); # block end $parser->symbol('->'); $parser->symbol('else'); $parser->symbol('with'); $parser->symbol('::'); # operators $parser->init_basic_operators(); # statements $s = $parser->symbol('if'); $s->set_std(\&std_if); $s->can_be_modifier(1); $parser->symbol('for') ->set_std(\&std_for); $parser->symbol('while' ) ->set_std(\&std_while); $parser->symbol('given') ->set_std(\&std_given); $parser->symbol('when') ->set_std(\&std_when); $parser->symbol('default') ->set_std(\&std_when); $parser->symbol('include') ->set_std(\&std_include); $parser->symbol('last') ->set_std(\&std_last); $parser->symbol('next') ->set_std(\&std_next); # macros $parser->symbol('cascade') ->set_std(\&std_cascade); $parser->symbol('macro') ->set_std(\&std_proc); $parser->symbol('around') ->set_std(\&std_proc); $parser->symbol('before') ->set_std(\&std_proc); $parser->symbol('after') ->set_std(\&std_proc); $parser->symbol('block') ->set_std(\&std_macro_block); $parser->symbol('super') ->set_std(\&std_super); $parser->symbol('override') ->set_std(\&std_override); $parser->symbol('->') ->set_nud(\&nud_lambda); # lexical variables/constants stuff $parser->symbol('constant')->set_nud(\&nud_constant); $parser->symbol('my' )->set_nud(\&nud_constant); return; } sub _build_iterator_element { return { index => \&iterator_index, count => \&iterator_count, is_first => \&iterator_is_first, is_last => \&iterator_is_last, body => \&iterator_body, size => \&iterator_size, max_index => \&iterator_max_index, peek_next => \&iterator_peek_next, peek_prev => \&iterator_peek_prev, cycle => \&iterator_cycle, }; } sub symbol { my($parser, $id, $lbp) = @_; my $stash = $parser->symbol_table; my $s = $stash->{$id}; if(defined $s) { if(defined $lbp) { $s->lbp($lbp); } } else { # create a new symbol $s = $parser->symbol_class->new(id => $id, lbp => $lbp || 0); $stash->{$id} = $s; } return $s; } sub define_pair { my($parser, $left, $right) = @_; $parser->symbol($left) ->counterpart($right); $parser->symbol($right)->counterpart($left); return; } # the low-level tokenizer. Don't use it directly, use advance() instead. sub tokenize { my($parser) = @_; local *_ = \$parser->{input}; my $comment_rx = $parser->comment_pattern; my $id_rx = $parser->identity_pattern; my $count = 0; TRY: { /\G (\s*) /xmsgc; $count += ( $1 =~ tr/\n/\n/); $parser->following_newline( $count ); if(/\G $comment_rx /xmsgc) { redo TRY; # retry } elsif(/\G ($id_rx)/xmsgc){ return [ name => $1 ]; } elsif(/\G ($NUMBER | $STRING)/xmsogc){ return [ literal => $1 ]; } elsif(/\G ($OPERATOR_TOKEN)/xmsogc){ return [ operator => $1 ]; } elsif(/\G (\S+)/xmsgc) { Carp::confess("Oops: Unexpected token '$1'"); } else { # empty return [ special => '(end)' ]; } } } sub next_token_is { my($parser, $token) = @_; return $parser->next_token->[1] eq $token; } # the high-level tokenizer sub advance { my($parser, $expect) = @_; my $t = $parser->token; if(defined($expect) && $t->id ne $expect) { $parser->_unexpected(neat($expect), $t); } $parser->near_token($t); my $stash = $parser->symbol_table; $t = $parser->next_token; if($t->[0] eq 'special') { return $parser->token( $stash->{ $t->[1] } ); } $parser->statement_is_finished( $parser->following_newline != 0 ); my $line = $parser->line( $parser->line + $parser->following_newline ); $parser->next_token( $parser->tokenize() ); my($arity, $id) = @{$t}; if( $arity eq "name" && $parser->next_token_is("=>") ) { $arity = "literal"; } print STDOUT "[$arity => $id] #$line\n" if _DUMP_TOKEN; my $symbol; if($arity eq "literal") { $symbol = $parser->symbol('(literal)')->clone( id => $id, value => $parser->parse_literal($id) ); } elsif($arity eq "operator") { $symbol = $stash->{$id}; if(not defined $symbol) { $parser->_error("Unknown operator '$id'"); } $symbol = $symbol->clone( arity => $arity, # to make error messages clearer ); } else { # name # find_or_create() returns a cloned symbol, # so there's not need to clone() here $symbol = $parser->find_or_create($id); } $symbol->line($line); return $parser->token($symbol); } sub parse_literal { my($parser, $literal) = @_; return literal_to_value($literal); } sub nud_name { my($parser, $symbol) = @_; return $symbol->clone( arity => 'name', ); } sub nud_variable { my($parser, $symbol) = @_; return $symbol->clone( arity => 'variable', ); } sub nud_literal { my($parser, $symbol) = @_; return $symbol->clone( arity => 'literal', ); } sub default_nud { my($parser, $symbol) = @_; return $symbol->clone(); # as is } sub default_led { my($parser, $symbol) = @_; $parser->near_token($parser->token); $parser->_error( sprintf 'Missing operator (%s): %s', $symbol->arity, $symbol->id); } sub default_std { my($parser, $symbol) = @_; $parser->near_token($parser->token); $parser->_error( sprintf 'Not a statement (%s): %s', $symbol->arity, $symbol->id); } sub expression { my($parser, $rbp) = @_; my $t = $parser->token; $parser->advance(); my $left = $t->nud($parser); while($rbp < $parser->token->lbp) { $t = $parser->token; $parser->advance(); $left = $t->led($parser, $left); } return $left; } sub expression_list { my($parser) = @_; my @list; while(1) { if($parser->token->is_value) { push @list, $parser->expression(0); } if(!$parser->token->is_comma) { last; } $parser->advance(); # comma } return \@list; } # for left associative infix operators sub led_infix { my($parser, $symbol, $left) = @_; return $parser->binary( $symbol, $left, $parser->expression($symbol->lbp) ); } sub infix { my($parser, $id, $bp, $led) = @_; my $symbol = $parser->symbol($id, $bp); $symbol->set_led($led || \&led_infix); return $symbol; } # for right associative infix operators sub led_infixr { my($parser, $symbol, $left) = @_; return $parser->binary( $symbol, $left, $parser->expression($symbol->lbp - 1) ); } sub infixr { my($parser, $id, $bp, $led) = @_; my $symbol = $parser->symbol($id, $bp); $symbol->set_led($led || \&led_infixr); return $symbol; } # for prefix operators sub prefix { my($parser, $id, $bp, $nud) = @_; my $symbol = $parser->symbol($id); $symbol->ubp($bp); $symbol->set_nud($nud || \&nud_prefix); return $symbol; } sub nud_prefix { my($parser, $symbol) = @_; my $un = $symbol->clone(arity => 'unary'); $parser->reserve($un); $un->first($parser->expression($symbol->ubp)); return $un; } sub led_assignment { my($parser, $symbol, $left) = @_; $parser->_error("Assignment ($symbol) is forbidden", $left); } sub assignment { my($parser, $id, $bp) = @_; $parser->symbol($id, $bp)->set_led(\&led_assignment); return; } # the ternary is a right associative operator sub led_ternary { my($parser, $symbol, $left) = @_; my $if = $symbol->clone(arity => 'if'); $if->first($left); $if->second([$parser->expression( $symbol->lbp - 1 )]); $parser->advance(":"); $if->third([$parser->expression( $symbol->lbp - 1 )]); return $if; } sub is_valid_field { my($parser, $token) = @_; my $arity = $token->arity; if($arity eq "name") { return 1; } elsif($arity eq "literal") { return is_int($token->id); } return 0; } sub led_dot { my($parser, $symbol, $left) = @_; my $t = $parser->token; if(!$parser->is_valid_field($t)) { $parser->_unexpected("a field name", $t); } my $dot = $symbol->clone( arity => "field", first => $left, second => $t->clone(arity => 'literal'), ); $t = $parser->advance(); if($t->id eq "(") { $parser->advance(); # "(" $dot->third( $parser->expression_list() ); $parser->advance(")"); $dot->arity("methodcall"); } return $dot; } sub led_fetch { # $h[$field] my($parser, $symbol, $left) = @_; my $fetch = $symbol->clone( arity => "field", first => $left, second => $parser->expression(0), ); $parser->advance("]"); return $fetch; } sub call { my($parser, $function, @args) = @_; if(not ref $function) { $function = $parser->symbol('(name)')->clone( arity => 'name', id => $function, line => $parser->line, ); } return $parser->symbol('(call)')->clone( arity => 'call', first => $function, second => \@args, ); } sub led_call { my($parser, $symbol, $left) = @_; my $call = $symbol->clone(arity => 'call'); $call->first($left); $call->second( $parser->expression_list() ); $parser->advance(")"); return $call; } sub led_pipe { # filter my($parser, $symbol, $left) = @_; # a | b -> b(a) return $parser->call($parser->expression($symbol->lbp), $left); } sub led_range { # x .. y my($parser, $symbol, $left) = @_; return $symbol->clone( arity => 'range', first => $left, second => $parser->expression(0), ); } sub nil { my($parser) = @_; return $parser->symbol('nil')->nud($parser); } sub nud_defined { my($parser, $symbol) = @_; $parser->reserve( $symbol->clone() ); # prefix: is a syntactic sugar to $a != nil return $parser->binary( '!=', $parser->expression($symbol->ubp), $parser->nil, ); } # for special literals (e.g. nil, true, false) sub nud_special { my($parser, $symbol) = @_; return $symbol->first; } sub define_literal { # special literals my($parser, $id, $value) = @_; my $symbol = $parser->symbol($id); $symbol->first( $symbol->clone( arity => defined($value) ? 'literal' : 'nil', value => $value, ) ); $symbol->set_nud(\&nud_special); $symbol->is_defined(1); return $symbol; } sub new_scope { my($parser) = @_; push @{ $parser->scope }, {}; return; } sub pop_scope { my($parser) = @_; pop @{ $parser->scope }; return; } sub undefined_name { my($parser, $name) = @_; if($name =~ /\A \$/xms) { return $parser->symbol_table->{'(variable)'}->clone( id => $name, ); } else { return $parser->symbol_table->{'(name)'}->clone( id => $name, ); } } sub find_or_create { # find a name from all the scopes my($parser, $name) = @_; my $s; foreach my $scope(reverse @{$parser->scope}){ $s = $scope->{$name}; if(defined $s) { return $s->clone(); } } $s = $parser->symbol_table->{$name}; return defined($s) ? $s : $parser->undefined_name($name); } sub reserve { # reserve a name to the scope my($parser, $symbol) = @_; if($symbol->arity ne 'name' or $symbol->is_reserved) { return $symbol; } my $top = $parser->scope->[-1]; my $t = $top->{$symbol->id}; if($t) { if($t->is_reserved) { return $symbol; } if($t->arity eq "name") { $parser->_error("Already defined: $symbol"); } } $top->{$symbol->id} = $symbol; $symbol->is_reserved(1); #$symbol->scope($top); return $symbol; } sub define { # define a name to the scope my($parser, $symbol) = @_; my $top = $parser->scope->[-1]; my $t = $top->{$symbol->id}; if(defined $t) { $parser->_error($t->is_reserved ? "Already is_reserved: $t" : "Already defined: $t"); } $top->{$symbol->id} = $symbol; $symbol->is_defined(1); $symbol->is_reserved(0); $symbol->remove_nud(); $symbol->remove_led(); $symbol->remove_std(); $symbol->lbp(0); #$symbol->scope($top); return $symbol; } sub print { my($parser, @args) = @_; return $parser->symbol('print')->clone( arity => 'print', first => \@args, line => $parser->line, ); } sub binary { my($parser, $symbol, $lhs, $rhs) = @_; if(!ref $symbol) { # operator $symbol = $parser->symbol($symbol); } if(!ref $lhs) { # literal $lhs = $parser->symbol('(literal)')->clone( id => $lhs, ); } if(!ref $rhs) { # literal $rhs = $parser->symbol('(literal)')->clone( id => $rhs, ); } return $symbol->clone( arity => 'binary', first => $lhs, second => $rhs, ); } sub define_function { my($parser, @names) = @_; foreach my $name(@names) { my $s = $parser->symbol($name); $s->set_nud(\&nud_name); $s->is_defined(1); } return; } sub finish_statement { my($parser, $expr) = @_; my $t = $parser->token; if($t->can_be_modifier) { $parser->advance(); $expr = $t->std($parser, $expr); $t = $parser->token; } if($t->is_block_end or $parser->statement_is_finished) { # noop } elsif($t->id eq ";") { $parser->advance(); } else { $parser->_unexpected("a semicolon or block end", $t); } return $expr; } sub statement { # process one or more statements my($parser) = @_; my $t = $parser->token; if($t->id eq ";"){ $parser->advance(); # ";" return; } if($t->has_std) { # is $t a statement? $parser->reserve($t); $parser->advance(); # std() can return a list of nodes return $t->std($parser); } my $expr = $parser->auto_command( $parser->expression(0) ); return $parser->finish_statement($expr); } sub auto_command { my($parser, $expr) = @_; if($expr->is_statement) { # expressions can produce pure statements (e.g. assignment ) return $expr; } else { return $parser->print($expr); } } sub statements { # process statements my($parser) = @_; my @a; for(my $t = $parser->token; !$t->is_block_end; $t = $parser->token) { push @a, $parser->statement(); } return \@a; } sub block { my($parser) = @_; $parser->new_scope(); $parser->advance("{"); my $a = $parser->statements(); $parser->advance("}"); $parser->pop_scope(); return $a; } sub nud_paren { my($parser, $symbol) = @_; my $expr = $parser->expression(0); $parser->advance( $symbol->counterpart ); return $expr; } # for object literals sub nud_brace { my($parser, $symbol) = @_; my $list = $parser->expression_list(); $parser->advance($symbol->counterpart); return $symbol->clone( arity => 'composer', first => $list, ); } # iterator variables ($~iterator) # $~iterator . NAME | NAME() sub nud_iterator { my($parser, $symbol) = @_; my $iterator = $symbol->clone(); if($parser->token->id eq ".") { $parser->advance(); my $t = $parser->token; if(!any_in($t->arity, qw(variable name))) { $parser->_unexpected("a field name", $t); } my $generator = $parser->iterator_element->{$t->value}; if(!$generator) { $parser->_error("Undefined iterator element: $t"); } $parser->advance(); # element name my $args; if($parser->token->id eq "(") { $parser->advance(); $args = $parser->expression_list(); $parser->advance(")"); } $iterator->second($t); return $generator->($parser, $iterator, @{$args}); } return $iterator; } sub nud_constant { my($parser, $symbol) = @_; my $t = $parser->token; my $expect = $symbol->id eq 'constant' ? 'name' : $symbol->id eq 'my' ? 'variable' : die "Oops: $symbol"; if($t->arity ne $expect) { $parser->_unexpected("a $expect", $t); } $parser->define($t)->arity("name"); $parser->advance(); $parser->advance("="); return $symbol->clone( arity => 'constant', first => $t, second => $parser->expression(0), is_statement => 1, ); } my $lambda_id = 0; sub lambda { my($parser, $proto) = @_; my $name = $parser->symbol('(name)')->clone( id => sprintf('lambda@%s:%d', $parser->file, $lambda_id++), ); return $parser->symbol('(name)')->clone( arity => 'proc', id => 'macro', first => $name, line => $proto->line, ); } # -> $x { ... } sub nud_lambda { my($parser, $symbol) = @_; my $pointy = $parser->lambda($symbol); $parser->new_scope(); my @params; if($parser->token->id ne "{") { # has params my $paren = ($parser->token->id eq "("); $parser->advance("(") if $paren; # optional my $t = $parser->token; while($t->arity eq "variable") { push @params, $t; $parser->define($t); $t = $parser->advance(); if($t->id eq ",") { $t = $parser->advance(); # "," } else { last; } } $parser->advance(")") if $paren; } $pointy->second( \@params ); $parser->advance("{"); $pointy->third($parser->statements()); $parser->advance("}"); $parser->pop_scope(); return $symbol->clone( arity => 'lambda', first => $pointy, ); } sub nud_current_file { my($self, $symbol) = @_; my $file = $self->file; return $symbol->clone( arity => 'literal', value => ref($file) ? '' : $file, ); } sub nud_current_line { my($self, $symbol) = @_; return $symbol->clone( arity => 'literal', value => $symbol->line, ); } sub nud_current_vars { my($self, $symbol) = @_; return $symbol->clone( arity => 'vars', ); } sub nud_separator { my($self, $symbol) = @_; $self->_error("Invalid expression found", $symbol); } # -> VARS { STATEMENTS } # -> { STATEMENTS } # { STATEMENTS } sub pointy { my($parser, $pointy, $in_for) = @_; my @params; $parser->new_scope(); if($parser->token->id eq "->") { $parser->advance(); if($parser->token->id ne "{") { my $paren = ($parser->token->id eq "("); $parser->advance("(") if $paren; my $t = $parser->token; while($t->arity eq "variable") { push @params, $t; $parser->define($t); if($in_for) { $parser->define_iterator($t); } $t = $parser->advance(); if($t->id eq ",") { $t = $parser->advance(); # "," } else { last; } } $parser->advance(")") if $paren; } } $pointy->second( \@params ); $parser->advance("{"); $pointy->third($parser->statements()); $parser->advance("}"); $parser->pop_scope(); return; } sub iterator_name { my($parser, $var) = @_; # $foo -> $~foo (my $it_name = $var->id) =~ s/\A (\$?) /${1}~/xms; return $it_name; } sub define_iterator { my($parser, $var) = @_; my $it = $parser->symbol( $parser->iterator_name($var) )->clone( arity => 'iterator', first => $var, ); $parser->define($it); $it->set_nud(\&nud_iterator); return $it; } sub std_for { my($parser, $symbol) = @_; my $proc = $symbol->clone(arity => 'for'); $proc->first( $parser->expression(0) ); $parser->pointy($proc, 1); # for-else support if($parser->token eq 'else') { $parser->advance(); my $else = $parser->block(); $proc = $symbol->clone( arity => 'for_else', first => $proc, second => $else, ) } return $proc; } sub std_while { my($parser, $symbol) = @_; my $proc = $symbol->clone(arity => 'while'); $proc->first( $parser->expression(0) ); $parser->pointy($proc); return $proc; } # macro name -> { ... } sub std_proc { my($parser, $symbol) = @_; my $macro = $symbol->clone(arity => "proc"); my $name = $parser->token; if($name->arity ne "name") { $parser->_unexpected("a name", $name); } $parser->define_function($name->id); $macro->first($name); $parser->advance(); $parser->pointy($macro); return $macro; } # block name -> { ... } # block name | filter -> { ... } sub std_macro_block { my($parser, $symbol) = @_; my $macro = $symbol->clone(arity => "proc"); my $name = $parser->token; if($name->arity ne "name") { $parser->_unexpected("a name", $name); } # auto filters my @filters; my $t = $parser->advance(); while($t->id eq "|") { $t = $parser->advance(); if($t->arity ne "name") { $parser->_unexpected("a name", $name); } my $filter = $t->clone(); $t = $parser->advance(); my $args; if($t->id eq "(") { $parser->advance(); $args = $parser->expression_list(); $t = $parser->advance(")"); } push @filters, $args ? $parser->call($filter, @{$args}) : $filter; } $parser->define_function($name->id); $macro->first($name); $parser->pointy($macro); my $call = $parser->call($macro->first); if(@filters) { foreach my $filter(@filters) { # apply filters $call = $parser->call($filter, $call); } } # std() can return a list return( $macro, $parser->print($call) ); } sub std_override { # synonym to 'around' my($parser, $symbol) = @_; return $parser->std_proc($symbol->clone(id => 'around')); } sub std_if { my($parser, $symbol, $expr) = @_; my $if = $symbol->clone(arity => "if"); $if->first( $parser->expression(0) ); if(defined $expr) { # statement modifier $if->second([$expr]); return $if; } $if->second( $parser->block() ); my $top_if = $if; my $t = $parser->token; while($t->id eq "elsif") { $parser->reserve($t); $parser->advance(); # "elsif" my $elsif = $t->clone(arity => "if"); $elsif->first( $parser->expression(0) ); $elsif->second( $parser->block() ); $if->third([$elsif]); $if = $elsif; $t = $parser->token; } if($t->id eq "else") { $parser->reserve($t); $t = $parser->advance(); # "else" $if->third( $t->id eq "if" ? [$parser->statement()] : $parser->block()); } return $top_if; } sub std_given { my($parser, $symbol) = @_; my $given = $symbol->clone(arity => 'given'); $given->first( $parser->expression(0) ); local $parser->{in_given} = 1; $parser->pointy($given); if(!(defined $given->second && @{$given->second})) { # if no topic vars $given->second([ $parser->symbol('($_)')->clone(arity => 'variable' ) ]); } $parser->build_given_body($given, "when"); return $given; } # when/default sub std_when { my($parser, $symbol) = @_; if(!$parser->in_given) { $parser->_error("You cannot use $symbol blocks outside given blocks"); } my $proc = $symbol->clone(arity => 'when'); if($symbol->id eq "when") { $proc->first( $parser->expression(0) ); } $proc->second( $parser->block() ); return $proc; } sub _only_white_spaces { my($s) = @_; return $s->arity eq "literal" && $s->value =~ m{\A [ \t\r\n]* \z}xms } sub build_given_body { my($parser, $given, $expect) = @_; my($topic) = @{$given->second}; # make if-elsif-else chain from given-when my $if; my $elsif; my $else; foreach my $when(@{$given->third}) { if($when->arity ne $expect) { # ignore white space if($when->id eq "print_raw" && !grep { !_only_white_spaces($_) } @{$when->first}) { next; } $parser->_unexpected("$expect blocks", $when); } $when->arity("if"); # change the arity if(defined(my $test = $when->first)) { # when if(!$test->is_logical) { $when->first( $parser->binary('~~', $topic, $test) ); } } else { # default $when->first( $parser->symbol('true')->nud($parser) ); $else = $when; next; } if(!defined $if) { $if = $when; $elsif = $when; } else { $elsif->third([$when]); $elsif = $when; } } if(defined $else) { # default if(defined $elsif) { $elsif->third([$else]); } else { $if = $else; # only default } } $given->third(defined $if ? [$if] : undef); return; } sub std_include { my($parser, $symbol) = @_; my $arg = $parser->barename(); my $vars = $parser->localize_vars(); my $stmt = $symbol->clone( first => $arg, second => $vars, arity => 'include', ); return $parser->finish_statement($stmt); } sub std_print { my($parser, $symbol) = @_; my $args; if($parser->token->id ne ";") { $args = $parser->expression_list(); } my $stmt = $symbol->clone( arity => 'print', first => $args, ); return $parser->finish_statement($stmt); } # for cascade() and include() sub barename { my($parser) = @_; my $t = $parser->token; if($t->arity ne 'name' or $t->is_defined) { # string literal for 'cascade', or any expression for 'include' return $parser->expression(0); } # path::to::name my @parts; push @parts, $t; $parser->advance(); while(1) { my $t = $parser->token; if($t->id eq "::") { $t = $parser->advance(); # "::" if($t->arity ne "name") { $parser->_unexpected("a name", $t); } push @parts, $t; $parser->advance(); } else { last; } } return \@parts; } # NOTHING | { expression-list } sub localize_vars { my($parser) = @_; if($parser->token->id eq "{") { $parser->advance(); $parser->new_scope(); my $vars = $parser->expression_list(); $parser->pop_scope(); $parser->advance("}"); return $vars; } return undef; } sub std_cascade { my($parser, $symbol) = @_; my $base; if($parser->token->id ne "with") { $base = $parser->barename(); } my $components; if($parser->token->id eq "with") { $parser->advance(); # "with" my @c = $parser->barename(); while($parser->token->id eq ",") { $parser->advance(); # "," push @c, $parser->barename(); } $components = \@c; } my $vars = $parser->localize_vars(); my $stmt = $symbol->clone( arity => 'cascade', first => $base, second => $components, third => $vars, ); return $parser->finish_statement($stmt); } sub std_super { my($parser, $symbol) = @_; my $stmt = $symbol->clone(arity => 'super'); return $parser->finish_statement($stmt); } sub std_next { my($parser, $symbol) = @_; my $stmt = $symbol->clone(arity => 'loop_control', id => 'next'); return $parser->finish_statement($stmt); } sub std_last { my($parser, $symbol) = @_; my $stmt = $symbol->clone(arity => 'loop_control', id => 'last'); return $parser->finish_statement($stmt); } # iterator elements sub bad_iterator_args { my($parser, $iterator) = @_; $parser->_error("Wrong number of arguments for $iterator." . $iterator->second); } sub iterator_index { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator return $iterator; } sub iterator_count { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator + 1 return $parser->binary('+', $iterator, 1); } sub iterator_is_first { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator == 0 return $parser->binary('==', $iterator, 0); } sub iterator_is_last { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator == $~iterator.max_index return $parser->binary('==', $iterator, $parser->iterator_max_index($iterator)); } sub iterator_body { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator.body return $iterator->clone( arity => 'iterator_body', ); } sub iterator_size { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator.max_index + 1 return $parser->binary('+', $parser->iterator_max_index($iterator), 1); } sub iterator_max_index { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # __builtin_max_index($~iterator.body) return $parser->symbol('max_index')->clone( arity => 'unary', first => $parser->iterator_body($iterator), ); } sub _iterator_peek { my($parser, $iterator, $pos) = @_; # $~iterator.body[ $~iterator.index + $pos ] return $parser->binary('[', $parser->iterator_body($iterator), $parser->binary('+', $parser->iterator_index($iterator), $pos), ); } sub iterator_peek_next { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; return $parser->_iterator_peek($iterator, +1); } sub iterator_peek_prev { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args != 0; # $~iterator.is_first ? nil : return $parser->symbol('?')->clone( arity => 'if', first => $parser->iterator_is_first($iterator), second => [$parser->nil], third => [$parser->_iterator_peek($iterator, -1)], ); } sub iterator_cycle { my($parser, $iterator, @args) = @_; $parser->bad_iterator_args($iterator) if @args < 2; # $iterator.cycle("foo", "bar", "baz") makes: # ($tmp = $~iterator % n) == 0 ? "foo" # : $tmp == 1 ? "bar" # : "baz" $parser->new_scope(); my $mod = $parser->binary('%', $iterator, scalar @args); # for the second time my $tmp = $parser->symbol('($cycle)')->clone(arity => 'name'); # for the first time my $cond = $iterator->clone( arity => 'constant', first => $tmp, second => $mod, ); my $parent = $iterator->clone( arity => 'if', first => $parser->binary('==', $cond, 0), second => [ $args[0] ], ); my $child = $parent; my $last = pop @args; for(my $i = 1; $i < @args; $i++) { my $nth = $iterator->clone( arity => 'if', id => "$iterator.cycle: $i", first => $parser->binary('==', $tmp, $i), second => [$args[$i]], ); $child->third([$nth]); $child = $nth; } $child->third([$last]); $parser->pop_scope(); return $parent; } # utils sub make_alias { # alas(from => to) my($parser, $from, $to) = @_; my $stash = $parser->symbol_table; if(exists $parser->symbol_table->{$to}) { Carp::confess( "Cannot make an alias to an existing symbol ($from => $to / " . p($parser->symbol_table->{$to}) .")"); } # make a snapshot return $stash->{$to} = $parser->symbol($from)->clone( value => $to, # real id ); } sub not_supported { my($parser, $symbol) = @_; $parser->_error("'$symbol' is not supported"); } sub _unexpected { my($parser, $expected, $got) = @_; if(defined($got) && $got ne ";") { if($got eq '(end)') { $parser->_error("Expected $expected, but reached EOF"); } else { $parser->_error("Expected $expected, but got " . neat("$got")); } } else { $parser->_error("Expected $expected"); } } sub _error { my($parser, $message, $near, $line) = @_; $near ||= $parser->near_token || ";"; if($near ne ";" && $message !~ /\b \Q$near\E \b/xms) { $message .= ", near $near"; } die $parser->make_error($message . ", while parsing templates", $parser->file, $line || $parser->line); } no Mouse; __PACKAGE__->meta->make_immutable; __END__ =head1 NAME Text::Xslate::Parser - The base class of template parsers =head1 DESCRIPTION This is a parser to build the abstract syntax tree from templates. The basis of the parser is Top Down Operator Precedence. =head1 SEE ALSO L - Top Down Operator Precedence (Douglas Crockford) L L L =cut