package Pugs::Emitter::Rule::Perl5::Ratchet;
# p6-rule perl5 emitter for ":ratchet" (non-backtracking)
# see: RuleInline.pl, RuleInline-more.pl for a program prototype
#use Smart::Comments '####';
use strict;
use warnings;
use Pugs::Emitter::Rule::Perl5::CharClass;
use Data::Dumper;
$Data::Dumper::Indent = 1;
our $direction = "+"; # XXX make lexical
our $sigspace = 0;
our $capture_count;
our $capture_to_array;
our $RegexPos;
our $count;
sub id {
if (!defined $count) {
if (defined $::PCR_SEED) {
#warn "SET SEED!!!";
srand($::PCR_SEED);
}
$count = 1000 + int(rand(1000));
}
'I' . ($count++)
}
sub call_subrule {
my ( $subrule, $tab, $positionals, @param ) = @_;
$subrule = "\$grammar->" . $subrule
unless $subrule =~ / :: | \. | -> /x;
$subrule =~ s/\./->/; # XXX - source filter
$positionals = shift @param if $positionals eq '' && @param == 1;
return
"$tab $subrule( \$s, { "
. "p => \$pos, "
. "positionals => [ $positionals ], "
. "args => {" .
join(", ",@param) .
"}, "
. "}, undef )";
}
sub quote_constant {
my $const;
if ( $_[0] eq "\\" ) {
$const = "chr(".ord("\\").")";
}
elsif ( $_[0] eq "'" ) {
$const = "chr(".ord("'").")"
}
else {
$const = "'$_[0]'"
}
return $const;
}
sub call_constant {
return " 1 # null constant\n"
unless length($_[0]);
my $const = quote_constant( $_[0] );
my $len = length( eval $const );
#print "Const: [$_[0]] $const $len \n";
return
"
$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] ( ( substr( \$s, \$pos, $len ) eq $const )
$_[1] ? ( \$pos $direction= $len or 1 )
$_[1] : 0
$_[1] )
$_[1] ## \n";
}
sub call_perl5 {
my $const = $_[0];
$_[1] = ' ' unless defined $_[1];
#print "CONST: $const - $direction \n";
return
"$_[1] ##
$_[1] ( ( substr( \$s, \$pos ) =~ m/^($const)/ )
$_[1] ? ( \$pos $direction= length( \$1 ) or 1 )
$_[1] : 0
$_[1] )
$_[1] ## \n";
}
sub emit {
my ($grammar, $ast, $param) = @_;
# runtime parameters: $grammar, $string, $state, $arg_list
# rule parameters: see Runtime::Rule.pm
local $sigspace = $param->{sigspace} ? 1 : 0; # XXX - $sigspace should be lexical
### ratchet emit sigspace: $sigspace
local $capture_count = -1;
local $capture_to_array = 0;
#print "rule: ", Dumper( $ast );
return
"##
## sigspace: $sigspace
## ratchet: 1
do { my \$rule; \$rule = sub {
my \$grammar = \$_[0];
my \$s = \$_[1];
\$_[3] = \$_[2] unless defined \$_[3]; # backwards compat
no warnings 'substr', 'uninitialized', 'syntax';
my \%pad;\n" .
#" my \$pos;\n" .
#" print \"match arg_list = \$_[1]\n\";\n" .
#" print 'match ', Dumper(\\\@_);\n" .
#" print \"match arg_list = \@{[\%{\$_[1]} ]}\n\" if defined \$_[1];\n" .
#" warn \"match pos = \", pos(\$_[1]), \"\\n\";\n" .
" my \$m;
my \$bool;
my \@pos;
# XXX :pos(X) takes the precedence over :continue ?
if (defined \$_[3]{p}) {
push \@pos, \$_[3]{p} || 0;
} elsif (\$_[3]{continue}) {
push \@pos, (pos(\$_[1]) || 0) .. length(\$s);
} else {
push \@pos, 0..length(\$s);
}
for my \$pos ( \@pos ) {
my \%index;
my \@match;
my \%named;
\$bool = 1;
\$named{KEY} = \$_[3]{KEY} if exists \$_[3]{KEY};
\$m = Pugs::Runtime::Match->new( {
str => \\\$s, from => \\(0+\$pos), to => \\(\$pos),
bool => \\\$bool, match => \\\@match, named => \\\%named, capture => undef,
} );
{
my \$prior = \$::_V6_PRIOR_;
local \$::_V6_PRIOR_ = \$prior;
\$bool = 0 unless
" .
#" do { TAILCALL: ;\n" .
emit_rule( $ast, ' ' ) . ";
}
if ( \$bool ) {
my \$prior = \$::_V6_PRIOR_;
\$::_V6_PRIOR_ = sub {
local \$main::_V6_PRIOR_ = \$prior;
\$rule->(\@_);
};
#warn \"pos2 = \", \$pos, \"\\n\";
pos(\$_[1]) = \$pos if \$_[3]{continue};
last;
}
} # /for
\$::_V6_MATCH_ = \$m;
return \$m;
} }
## \n";
}
sub emit_rule {
my $n = $_[0];
my $tab = $_[1] . ' ';
die "unknown node: ", Dumper( $n )
unless ref( $n ) eq 'HASH';
#print "NODE ", Dumper($n);
my @keys = grep { substr($_, 0, 1) ne '_' } keys %$n;
### Node keys: @keys
my ($k) = @keys;
my $v = $n->{$k};
local $RegexPos = $n->{_pos};
### $RegexPos
if (!defined $RegexPos) {
# warn "WARNING: No _pos slot found for AST node '$k'.\n";
# warn Dumper($n);
$RegexPos = [];
}
# XXX - use real references
no strict 'refs';
#print "NODE ", Dumper($k), ", ", Dumper($v);
my $code = $k->( $v, $tab );
return $code;
}
#rule nodes
sub non_capturing_group {
return emit_rule( $_[0], $_[1] );
}
sub quant {
my $term = $_[0]->{'term'};
my $quantifier = $_[0]->{quant} || '';
my $greedy = $_[0]->{greedy} || ''; # + ?
die "greediness control not implemented: $greedy"
if $greedy;
#print "QUANT: ",Dumper($_[0]);
my $id = id();
my $tab = ( $quantifier eq '' ) ? $_[1] : $_[1] . " ";
my $ws = metasyntax( { metasyntax => 'ws', modifier => '.' }, $tab );
my $ws3 = ( $sigspace && $_[0]->{ws3} ne '' ) ? " &&\n$ws" : '';
my $rul;
{
#print "Term: ", Dumper($term), "\n";
my $cap = $capture_to_array;
local $capture_to_array = $cap || ( $quantifier ne '' );
$rul = emit_rule( $term, $tab );
# rollback on fail
$rul = "$_[1] ( "
. " ( \$pad{$id} = \$pos or 1 ) &&\n"
. $rul
. " ||"
. " ( ( \$pos = \$pad{$id} ) && 0 )"
. " )";
}
$rul = "$ws &&\n$rul" if $sigspace && $_[0]->{ws1} ne '';
$rul = "$rul &&\n$ws" if $sigspace && $_[0]->{ws2} ne '';
#print $rul;
return "
$_[1] ##
$_[1] ## pos: @$RegexPos
" . $rul . "
$_[1] ## \n"
if $quantifier eq '';
# * + ?
# TODO: *? +? ??
# TODO: *+ ++ ?+
# TODO: quantifier + capture creates Array
#warn Dumper( $quantifier );
if ( ref( $quantifier ) eq 'HASH' )
{
my $code = $quantifier->{closure};
if ( ref( $code ) ) {
if ( defined $Pugs::Compiler::Perl6::VERSION ) {
#print " perl6 compiler is loaded \n";
$code = Pugs::Emitter::Perl6::Perl5::emit( 'grammar', $code, 'self' );
}
};
my @count = eval $code;
#warn "code: $code = [ @count ]";
die "quantifier not implemented: " . Dumper( $quantifier )
if @count ne 1
|| $count[0] == 0;
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] (\n" .
join( ' && ', ($rul) x $count[0] ) .
"\n" .
"$_[1] )$ws3\n" .
"$_[1] ## \n";
}
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] (\n$rul\n" .
"$_[1] || ( \$bool = 1 )\n" .
"$_[1] )$ws3\n" .
"$_[1] ## \n"
if $quantifier eq '?';
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] do { while (\n$rul) {}; \$bool = 1 }$ws3\n" .
"$_[1] ## \n"
if $quantifier eq '*';
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] (\n$rul\n" .
"$_[1] && do { while (\n$rul) {}; \$bool = 1 }\n" .
"$_[1] )$ws3\n" .
"$_[1] ## \n"
if $quantifier eq '+';
die "quantifier not implemented: $quantifier";
}
sub alt {
my @s;
# print 'Alt: ';
my $count = $capture_count;
my $max = -1;
my $id = id();
for ( @{$_[0]} ) {
$capture_count = $count;
my $tmp = emit_rule( $_, $_[1].' ' );
# print ' ',$capture_count;
$max = $capture_count
if $capture_count > $max;
push @s, $tmp if $tmp;
}
$capture_count = $max;
# print " max = $capture_count\n";
return
"$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] (
$_[1] ( \$pad{$id} = \$pos or 1 )
$_[1] && (
" . join( "
$_[1] )
$_[1] || (
$_[1] ( ( \$bool = 1 ) && ( \$pos = \$pad{$id} ) or 1 )
$_[1] && ",
@s
) . "
$_[1] )
$_[1] )
$_[1] ## \n";
}
sub alt1 { &alt }
sub conjunctive {
my @s;
# print 'conjunctive: ';
my $count = $capture_count;
my $max = -1;
my $id = id();
for ( @{$_[0]} ) {
$capture_count = $count;
my $tmp = emit_rule( $_, $_[1].' ' );
# print ' ',$capture_count;
$max = $capture_count
if $capture_count > $max;
push @s, $tmp if $tmp;
}
$capture_count = $max;
# print " max = $capture_count\n";
return
"$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] (
$_[1] ( \$pad{$id} = \$pos or 1 )
$_[1] && (
" . join( "
$_[1] )
$_[1] && (
$_[1] ( ( \$bool = 1 ) && ( \$pos = \$pad{$id} ) or 1 )
$_[1] && ",
@s
) . "
$_[1] )
$_[1] )
$_[1] ## \n";
}
sub conjunctive1 { &conjunctive }
sub concat {
my @s;
=for optimizing
# optimize for the common case of "words"
# Note: this optimization has almost no practical effect
my $is_constant = 0;
for ( @{$_[0]} ) {
if ( ! $sigspace && exists $_->{quant} ) {
my $was_constant = $is_constant;
$is_constant =
$_->{quant}->{quant} eq ''
&& exists $_->{quant}->{term}->{constant};
#print "concat: ", Dumper( $_ );
if ( $is_constant && $was_constant && $direction ne '-' ) {
$s[-1]->{quant}->{term}->{constant} .=
$_->{quant}->{term}->{constant};
#print "constant: ",$s[-1]->{quant}->{term}->{constant},"\n";
next;
}
}
push @s, $_;
}
for ( @s ) {
$_ = emit_rule( $_, $_[1] );
}
=cut
# Try to remove non-greedy quantifiers, by inserting a lookahead;
# cheat: / .*? b /
# into: / [ . ]* b /
# TODO - make it work for '+' quantifier too
for my $i ( 0 .. @{$_[0]} - 1 ) {
if ( exists $_[0][$i]{quant}
&& $_[0][$i]{quant}{quant} eq '*'
&& $_[0][$i]{quant}{greedy} eq '?'
) {
my $tmp = { quant => {
%{ $_[0][$i]{quant} },
greedy => '', quant => ''
},
_pos => $_[0][$i]{_pos}
};
$_[0][$i] = {
_pos => $_[0][$i]{_pos},
quant => {
greedy => '',
quant => $_[0][$i]{quant}{quant},
ws1 => '',
ws2 => '',
ws3 => '',
term => {
_pos => $_[0][$i]{_pos},
concat => [
{
_pos => $_[0][$i]{_pos},
before => {
rule => {
_pos => $_[0][$i]{_pos},
concat => [
@{ $_[0] }[$i+1 .. $#{ $_[0] } ]
],
},
modifier => '!',
}
},
$tmp,
],
},
},
};
#warn "Quant: ",Dumper($_[0]);
}
}
for ( @{$_[0]} ) {
my $tmp = emit_rule( $_, $_[1] );
push @s, $tmp if $tmp;
}
@s = reverse @s if $direction eq '-';
return
"$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] (\n" . join( "\n$_[1] &&\n", @s ) . "
$_[1] )
$_[1] ## \n";
}
sub code {
return "$_[1] $_[0]\n";
}
sub dot {
"
$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] ( substr( \$s, \$pos$direction$direction, 1 ) ne '' )
$_[1] ## \n"
}
sub variable {
my $name = "$_[0]";
my $value = undef;
# XXX - eval $name doesn't look up in user lexical pad
# XXX - what &xxx interpolate to?
#print "VAR: $name \n";
# expand embedded $scalar
if ( $name =~ /^\$/ ) {
# $^a, $^b
if ( $name =~ /^ \$ \^ ([^\s]*) /x ) {
my $index = ord($1)-ord('a');
#print "Variable #$index\n";
#return "$_[1] constant( \$_[7][$index] )\n";
my $code =
" ... sub {
#print \"Runtime Variable args[\", join(\",\",\@_) ,\"] \$_[7][$index]\\n\";
return constant( \$_[7][$index] )->(\@_);
}";
$code =~ s/^/$_[1]/mg;
return "$code\n";
}
$value = eval $name;
}
# expand embedded @arrays
if ( $name =~ /^\@/ ) {
my $code = q!
join(
'|',
! . $name . q!
)
!;
return
"$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] ( eval( '( substr( \$s, \$pos ) =~ m/^(' . $code . ')/ )
$_[1] ? ( \$pos $direction= length( \$1 ) or 1 )
$_[1] : 0
$_[1] ') )
$_[1] ## \n";
}
# expand embedded %hash
if ( $name =~ /^%/ ) {
my $id = '$' . id();
my $preprocess_hash = 'Pugs::Runtime::Regex::preprocess_hash';
my $code =
"
##
## pos: @$RegexPos
do {
our $id;
our ${id}_sizes;
unless ( $id ) {
my \$hash = \\$name;
my \%sizes = map { length(\$_) => 1 } keys \%\$hash;
${id}_sizes = [ sort { \$b <=> \$a } keys \%sizes ];
" . #print \"sizes: \@${id}_sizes\\n\";
"$id = \$hash;
}
" . #print 'keys: ',Dumper( $id );
"my \$match = 0;
my \$key;
for ( \@". $id ."_sizes ) {
\$key = ( \$pos <= length( \$s )
? substr( \$s, \$pos, \$_ )
: '' );
" . #print \"try ".$name." \$_ = \$key; \$s\\\n\";
"if ( exists ". $id ."->{\$key} ) {
#\$named{KEY} = \$key;
#\$::_V6_MATCH_ = \$m;
#print \"m: \", Dumper( \$::_V6_MATCH_->data )
# if ( \$key eq 'until' );
" . #print \"* ".$name."\{'\$key\'} at \$pos \\\n\";
"\$match = $preprocess_hash( $id, \$key )->( \$s, \$grammar, { p => ( \$pos + \$_ ), positionals => [ ], args => { KEY => \$key } }, undef );
" . #print \"match: \", Dumper( \$match->data );
"last if \$match;
}
}
if ( \$match ) {
\$pos = \$match->to;
#print \"match: \$key at \$pos = \", Dumper( \$match->data );
\$bool = 1;
}; # else { \$bool = 0 }
\$match;
}
##
";
#print $code;
return $code;
}
die "interpolation of $name not implemented"
unless defined $value;
return call_constant( $value, $_[1] );
}
sub special_char {
my ($char, $data) = $_[0] =~ /^.(.)(.*)/;
return call_perl5( '\\N{$data}', $_[1] )
if $char eq 'c';
return call_perl5( '(?!\\N{$data}).', $_[1] )
if $char eq 'C';
return call_perl5( '\\x{'.$data.'}', $_[1] )
if $char eq 'x';
return call_perl5( '(?!\\x{'.$data.'}).', $_[1] )
if $char eq 'X';
return special_char( sprintf("\\x%X", oct($data) ) )
if $char eq 'o';
return special_char( sprintf("\\X%X", oct($data) ) )
if $char eq 'O';
return call_perl5( '(?:\n\r?|\r\n?)', $_[1] )
if $char eq 'n';
return call_perl5( '(?!\n\r?|\r\n?).', $_[1] )
if $char eq 'N';
# XXX - Infinite loop in pugs stdrules.t
#return metasyntax( '?_horizontal_ws', $_[1] )
return call_perl5( '[\x20\x09]' )
if $char eq 'h';
return call_perl5( '[^\x20\x09]' )
if $char eq 'H';
#return metasyntax( '?_vertical_ws', $_[1] )
return call_perl5( '[\x0A\x0D]' )
if $char eq 'v';
return call_perl5( '[^\x0A\x0D]' )
if $char eq 'V';
for ( qw( r n t e f w d s ) ) {
return call_perl5( "\\$_", $_[1] ) if $char eq $_;
return call_perl5( "[^\\$_]", $_[1] ) if $char eq uc($_);
}
$char = '\\\\' if $char eq '\\';
### special char: $char
return call_constant( $char, $_[1] );
}
sub match_variable {
my $name = $_[0];
my $num = substr($name,1);
#print "var name: ", $num, "\n";
return
"
$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] ( eval( '( substr( \$s, \$pos ) =~ m/^(' . \$m->{$num} . ')/ )
$_[1] ? ( \$pos $direction= length( \$1 ) or 1 )
$_[1] : 0
$_[1] ') )
$_[1] ##
";
}
sub closure {
#print "closure: ",Dumper($_[0]);
my $code = $_[0]{closure};
my $modifier = $_[0]{modifier}; # 'plain', '', '?', '!'
die "invalid closure modifier: . "
if $modifier eq '.';
#die "closure modifier not implemented '$modifier'"
# unless $modifier eq 'plain';
if ( ref( $code )
&& defined $Pugs::Compiler::Perl6::VERSION
) {
#print " perl6 compiler is loaded \n";
$code = Pugs::Emitter::Perl6::Perl5::emit( 'grammar', $code, 'self' );
$code = '{ my $_V6_SELF = shift; ' . $code . '}'; # make it a "method"
}
else {
#print " perl6 compiler is NOT loaded \n";
# XXX XXX XXX - source-filter - temporary hacks to translate p6 to p5
# $()
$code =~ s/ ([^']) \$ \$ (\d+) /$1\${ \$_[0]->[$2] }/sgx;
$code =~ s/ ([^']) \$ (\d+) /$1\$_[0]->[$2]/sgx;
$code =~ s/ ([^']) \$ \( \) < (.*?) > /$1\$_[0]->{$2}/sgx;
# $
$code =~ s/ ([^']) \$ \$ < (.*?) > /$1\${ \$_[0]->{qw($2)} }/sgx;
$code =~ s/ ([^']) \$ < (.*?) > /$1\$_[0]->{qw($2)}/sgx;
# $()
$code =~ s/ ([^']) \$ \( \) /$1\$_[0]->()/sgx;
# $/
$code =~ s/ ([^']) \$ \/ ([\{\[]) /$1\$_[0]->$2/sgx;
$code =~ s/ ([^']) \$ \/ /$1\$_[0]/sgx;
#$code =~ s/ use \s+ v6 \s* ; / # use v6\n/sgx;
}
#print "Code: $code\n";
# "plain" {...return ...}
return
"$_[1] ## \n"
. "$_[1] ## pos: @$RegexPos\n"
. "$_[1] do {\n"
. "$_[1] local \$::_V6_SUCCEED = 1;\n"
. "$_[1] \$::_V6_MATCH_ = \$m;\n"
. "$_[1] \$m->data->{capture} = \\( sub $code->( \$m ) ); \n"
. "$_[1] \$bool = \$::_V6_SUCCEED;\n"
. "$_[1] \$::_V6_MATCH_ = \$m if \$bool; \n"
. "$_[1] return \$m if \$bool; \n"
. "$_[1] }\n"
. "$_[1] ## \n"
if $code =~ /return/;
# "plain" {...} without return
return
"$_[1] ## \n"
. "$_[1] ## pos: @$RegexPos\n"
. "$_[1] do { \n"
. "$_[1] local \$::_V6_SUCCEED = 1;\n"
. "$_[1] \$::_V6_MATCH_ = \$m;\n"
. "$_[1] sub $code->( \$m );\n"
. "$_[1] 1;\n"
. "$_[1] }\n"
. "$_[1] ## \n"
if $modifier eq 'plain';
# "?" {...}>
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] do { \n" .
"$_[1] local \$::_V6_SUCCEED = 1;\n" .
"$_[1] \$::_V6_MATCH_ = \$m;\n" .
"$_[1] \$bool = ( sub $code->( \$m ) ) ? 1 : 0; \n" .
"$_[1] }" .
"$_[1] ## \n"
if $modifier eq '?';
# "!"
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] do { \n" .
"$_[1] local \$::_V6_SUCCEED = 1;\n" .
"$_[1] \$::_V6_MATCH_ = \$m;\n" .
"$_[1] \$bool = ( sub $code->( \$m ) ) ? 0 : 1; \n" .
"$_[1] }" .
"$_[1] ## \n"
if $modifier eq '!';
}
sub capturing_group {
my $program = $_[0];
$capture_count++;
{
local $capture_count = -1;
local $capture_to_array = 0;
$program = emit_rule( $program, $_[1].' ' )
if ref( $program );
}
return "
$_[1] ##
$_[1] do{
$_[1] my \$hash = do {
$_[1] my \$bool = 1;
$_[1] my \$from = \$pos;
$_[1] my \@match;
$_[1] my \%named;
$_[1] \$bool = 0 unless
" . $program . ";
$_[1] { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef }
$_[1] };
$_[1] my \$bool = \${\$hash->{'bool'}};" .
( $capture_to_array
? "
$_[1] if ( \$bool ) {
$_[1] push \@{ \$match[ $capture_count ] }, Pugs::Runtime::Match->new( \$hash );
$_[1] }"
: "
$_[1] \$match[ $capture_count ] = Pugs::Runtime::Match->new( \$hash );"
) . "
$_[1] \$bool;
$_[1] }
$_[1] ## \n";
}
sub capture_as_result {
my $program = $_[0];
$capture_count++;
{
local $capture_count = -1;
local $capture_to_array = 0;
$program = emit_rule( $program, $_[1].' ' )
if ref( $program );
}
return "$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do{
$_[1] my \$hash = do {
$_[1] my \$bool = 1;
$_[1] my \$from = \$pos;
$_[1] my \@match;
$_[1] my \%named;
$_[1] \$bool = 0 unless
" . $program . ";
$_[1] { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef }
$_[1] };
$_[1] my \$bool = \${\$hash->{'bool'}};
$_[1] \$m->data->{capture} = \\( \"\" . Pugs::Runtime::Match->new( \$hash ) );
$_[1] \$bool;
$_[1] }
$_[1] ## \n";
}
sub named_capture {
my $name = $_[0]{ident};
### $name
if (ref($name) eq 'HASH') {
$name = $name->{match_variable} || $name->{variable};
}
$name =~ s/^[\$\@\%]//; # TODO - change semantics as needed
my $program = $_[0]{rule};
#warn "name [$name]\n";
if ( exists $program->{metasyntax} ) {
#print "aliased subrule\n";
# $/ = $/
my $cmd = $program->{metasyntax}{metasyntax};
die "invalid aliased subrule"
unless $cmd =~ /^[_[:alnum:]]/;
#
my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd );
$param_list = '' unless defined $param_list;
my @param = split( ',', $param_list );
return "$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do {
my \$prior = \$::_V6_PRIOR_;
my \$match =\n" .
call_subrule( $subrule, $_[1]." ", "", @param ) . ";
\$::_V6_PRIOR_ = \$prior;
if ( \$match ) {" .
( $capture_to_array
? " push \@{\$named{'$name'}}, \$match;"
: " \$named{'$name'} = \$match;"
) . "
\$pos = \$match->to;
1
}
else { 0 }
}
$_[1] ## \n";
}
elsif ( exists $program->{capturing_group} ) {
#print "aliased capturing_group\n";
# $/ = $/[0]
{
local $capture_count = -1;
local $capture_to_array = 0;
$program = emit_rule( $program, $_[1].' ' )
if ref( $program );
}
return "$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do{
my \$match = Pugs::Runtime::Match->new( do {
my \$bool = 1;
my \$from = \$pos;
my \@match;
my \%named;
\$bool = 0 unless " .
$program . ";
{ str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef }
} );
if ( \$match ) {" .
( $capture_to_array
? " push \@{\$named{'$name'}}, \$match;"
: " \$named{'$name'} = \$match;"
) . "
\$pos = \$match->to;
1
}
else { 0 }
}
$_[1] ## \n";
}
else {
#print "aliased non_capturing_group\n";
# $/ = "$/"
#print Dumper( $_[0] );
$program = emit_rule( $program, $_[1].' ' );
return "$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do{
my \$from = \$pos;
my \$bool = $program;
my \$match = Pugs::Runtime::Match->new(
{ str => \\\$s, from => \\\$from, match => [], named => {}, bool => \\1, to => \\(0+\$pos), capture => undef }
);" .
( $capture_to_array
? " push \@{\$named{'$name'}}, \$match;"
: " \$named{'$name'} = \$match;"
) . "
\$bool
}
$_[1] ## \n";
}
}
sub negate {
my $program = $_[0];
#print "Negate: ", Dumper($_[0]);
$program = emit_rule( $program, $_[1].' ' )
if ref( $program );
return "$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do{
$_[1] my \$pos1 = \$pos;
$_[1] do {
$_[1] my \$pos = \$pos1;
$_[1] my \$from = \$pos;
$_[1] my \@match;
$_[1] my \%named;
$_[1] \$bool = " . $program . " ? 0 : 1;
$_[1] \$bool;
$_[1] };
$_[1] }
$_[1] ## \n";
}
sub before {
my $mod = delete $_[0]{modifier} || '';
#### before atom: $_[0]
return negate( { before => $_[0], _pos => $_[0]{rule}{_pos}, }, $_[1] ) if $mod eq '!';
my $program = $_[0]{rule};
$program = emit_rule( $program, $_[1].' ' )
if ref( $program );
return "
$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do{
$_[1] my \$pos1 = \$pos;
$_[1] do {
$_[1] my \$pos = \$pos1;
$_[1] my \$from = \$pos;
$_[1] my \@match;
$_[1] my \%named;
$_[1] \$bool = 0 unless
" . $program . ";
$_[1] \$bool;
$_[1] };
$_[1] }
$_[1] ## \n";
}
sub after {
my $mod = delete $_[0]{modifier};
return negate( { after => $_[0] }, $_[1] ) if $mod eq '!';
local $direction = "-";
my $program = $_[0]{rule};
$program = emit_rule( $program, $_[1].' ' )
if ref( $program );
return "$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do{
$_[1] my \$pos1 = \$pos;
$_[1] do {
$_[1] my \$pos = \$pos1 - 1;
$_[1] my \$from = \$pos;
$_[1] my \@match;
$_[1] my \%named;
$_[1] \$bool = 0 unless
" . $program . ";
$_[1] \$bool;
$_[1] };
$_[1] }
$_[1] ## \n";
}
sub colon {
my $str = $_[0];
return "$_[1] 1 # : no-op\n"
if $str eq ':';
return "$_[1] ( \$pos >= length( \$s ) )\n"
if $str eq '$';
return "$_[1] ( \$pos == 0 )\n"
if $str eq '^';
return "$_[1] ( \$pos >= length( \$s ) || substr( \$s, \$pos ) =~ ".'/^(?:\n\r?|\r\n?)/m'." )\n"
if $str eq '$$';
return "$_[1] ( \$pos == 0 || substr( \$s, 0, \$pos ) =~ ".'/(?:\n\r?|\r\n?)$/m'." )\n"
if $str eq '^^';
return metasyntax( { metasyntax => '_wb_left', modifier => '?' }, $_[1] )
if $str eq '<<';
return metasyntax( { metasyntax => '_wb_right', modifier => '?' }, $_[1] )
if $str eq '>>';
die "'$str' not implemented";
}
sub modifier {
my $str = $_[0];
die "modifier '$str' not implemented";
}
sub constant {
call_constant( @_ );
}
sub char_class {
my $cmd = Pugs::Emitter::Rule::Perl5::CharClass::emit( $_[0] );
return call_perl5($cmd, $_[1]);
}
sub call {
#die "not implemented: ", Dumper(\@_);
my $param = $_[0]{params};
my $name = $_[0]{method};
# capturing subrule
#
my ($param_list) = $param =~ /\{(.*)\}/;
$param_list = '' unless defined $param_list;
my @param = split( ',', $param_list );
#print "param: ", Dumper(\@param);
# TODO
if ( $name eq 'at' ) {
$param_list ||= 0; # XXX compile-time only
return "$_[1] ( \$pos == $param_list )\n"
}
return named_capture(
{
ident => $name,
rule => { metasyntax => { metasyntax => $name }, _pos => $_[0]{_pos}, },
},
$_[1],
);
}
sub metasyntax {
#
#print Dumper(\@_);
my $cmd = $_[0]{metasyntax};
my $modifier = delete $_[0]{modifier} || ''; # . ? !
return negate( { metasyntax => $_[0], _pos => $_[0]{_pos} }, $_[1] ) if $modifier eq '!';
my $prefix = substr( $cmd, 0, 1 );
if ( $prefix eq '@' ) {
# XXX - wrap @array items - see end of Pugs::Grammar::Rule
# TODO - param list
my $name = substr( $cmd, 1 );
return
"$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do {
my \$match;
for my \$subrule ( $cmd ) {
\$match = \$subrule->match( \$s, \$grammar, { p => ( \$pos ), positionals => [ ], args => {} }, undef );
last if \$match;
}
if ( \$match ) {" .
( $capture_to_array
? " push \@{\$named{'$name'}}, \$match;"
: " \$named{'$name'} = \$match;"
) . "
\$pos = \$match->to;
1
}
else { 0 }
}
$_[1] ## \n";
}
if ( $prefix eq '%' ) {
# XXX - runtime or compile-time interpolation?
my $name = substr( $cmd, 1 );
# print "<$cmd>\n";
# return variable( $cmd );
return "$_[1]##
$_[1] ## pos: @$RegexPos
$_[1] do{
my \$match = " . variable( $cmd, $_[1] ) . ";
if ( \$match ) {" .
( $capture_to_array
? " push \@{\$named{'$name'}}, \$match;"
: " \$named{'$name'} = \$match;"
) . "
\$pos = \$match->to;
1
}
else { 0 }
}\n$_[1]## \n";
}
if ( $prefix eq '$' ) {
if ( $cmd =~ /::/ ) {
# call method in fully qualified $package::var
# ...->match( $rule, $str, $grammar, $flags, $state )
# TODO - send $pos to subrule
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] do {\n" .
"$_[1] push \@match,\n" .
"$_[1] $cmd->match( \$s, \$grammar, {p => \$pos}, undef );\n" .
"$_[1] \$pos = \$match[-1]->to;\n" .
"$_[1] !\$match[-1] != 1;\n" .
"$_[1] }\n" .
"$_[1] ## \n";
}
# call method in lexical $var
# TODO - send $pos to subrule
return
"$_[1] ## \n" .
"$_[1] ## pos: @$RegexPos\n" .
"$_[1] do {\n" .
"$_[1] my \$r = Pugs::Runtime::Regex::get_variable( '$cmd' );\n" .
"$_[1] push \@match,\n" .
"$_[1] \$r->match( \$s, \$grammar, {p => \$pos}, undef );\n" .
"$_[1] \$pos = \$match[-1]->to;\n" .
"$_[1] !\$match[-1] != 1;\n" .
"$_[1] }\n" .
"$_[1] ## \n";
}
if ( $prefix eq q(') ) { # single quoted literal '
$cmd = substr( $cmd, 1, -1 );
return call_constant( $cmd, $_[1] );
}
if ( $prefix eq q(") ) { # interpolated literal "
$cmd = substr( $cmd, 1, -1 );
warn "<\"...\"> not implemented";
return;
}
if (
$modifier eq '.'
|| $modifier eq '?' # XXX FIXME
)
{ # non_capturing_subrule / code assertion
#$cmd = substr( $cmd, 1 );
if ( $cmd =~ /^{/ ) {
warn "code assertion not implemented";
return;
}
my @param; # TODO
my $subrule = $cmd;
return
"$_[1] ##
$_[1] ## pos: @$RegexPos
$_[1] do {
$_[1] my \$prior = \$::_V6_PRIOR_;
$_[1] my \$match =\n" .
call_subrule( $subrule, $_[1]." ", "", @param ) . ";
$_[1] \$::_V6_PRIOR_ = \$prior;
$_[1] my \$bool = (!\$match != 1);
$_[1] \$pos = \$match->to if \$bool;
$_[1] \$match;
$_[1] }
$_[1] ## \n";
}
if ( $prefix =~ /[_[:alnum:]]/ ) {
if ( $cmd eq 'cut' ) {
warn "<$cmd> not implemented";
return;
}
if ( $cmd eq 'commit' ) {
warn "<$cmd> not implemented";
return;
}
if ( $cmd eq 'null' ) {
return "$_[1] 1 # null\n"
}
#
my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd );
$param_list ||= '';
if ( $subrule eq 'at' ) {
$param_list ||= 0; # XXX compile-time only
return "$_[1] ( \$pos == $param_list )\n"
}
return named_capture(
{
ident => $subrule,
rule => { metasyntax => { metasyntax => $cmd }, _pos => $_[0]->{_pos} },
},
$_[1],
);
}
#### $prefix
#### $modifier
#if ( $prefix eq '.' ) {
# my ( $method, $param_list ) = split( /[\(\)]/, $cmd );
# $method =~ s/^\.//;
# $param_list ||= '';
# return " ( \$s->$method( $param_list ) ? 1 : 0 ) ";
#}
die "<$cmd> not implemented";
}
1;