Skip to content

join: be less magical and handle magic with less undefined behaviour #21502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions doop.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,13 @@ Perl_do_trans(pTHX_ SV *sv)
}
}

#ifdef DEBUGGING
/* make it small to exercise the logic */
# define JOIN_DELIM_BUFSIZE 2
#else
# define JOIN_DELIM_BUFSIZE 40
#endif

/*
=for apidoc_section $string
=for apidoc do_join
Expand All @@ -658,13 +665,30 @@ Magic and tainting are handled.
void
Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
{
PERL_ARGS_ASSERT_DO_JOIN;

SV ** const oldmark = mark;
I32 items = sp - mark;
SSize_t items = sp - mark;
STRLEN len;
STRLEN delimlen;
const char * const delims = SvPV_const(delim, delimlen);

PERL_ARGS_ASSERT_DO_JOIN;
const char * delimpv = SvPV_const(delim, delimlen);
char delim_buf[JOIN_DELIM_BUFSIZE];
bool delim_do_utf8 = DO_UTF8(delim);

if (items >= 2) {
/* Make a copy of the delim, since G or A magic may modify the delim SV.
Use a local buffer if possible to avoid the cost of allocation and
clean up.
*/
if (delimlen <= JOIN_DELIM_BUFSIZE) {
Copy(delimpv, delim_buf, delimlen, char);
delimpv = delim_buf;
}
else {
delimpv = savepvn(delimpv, delimlen);
SAVEFREEPV(delimpv);
}
}

mark++;
len = (items > 0 ? (delimlen * (items - 1) ) : 0);
Expand Down Expand Up @@ -699,11 +723,11 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
}

if (delimlen) {
const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES;
const U32 delimflag = delim_do_utf8 ? SV_CATUTF8 : SV_CATBYTES;
for (; items > 0; items--,mark++) {
STRLEN len;
const char *s;
sv_catpvn_flags(sv,delims,delimlen,delimflag);
sv_catpvn_flags(sv, delimpv, delimlen, delimflag);
s = SvPV_const(*mark,len);
sv_catpvn_flags(sv,s,len,
DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
Expand Down
89 changes: 88 additions & 1 deletion t/op/join.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ BEGIN {
set_up_inc('../lib');
}

plan tests => 29;
plan tests => 43;

@x = (1, 2, 3);
is( join(':',@x), '1:2:3', 'join an array with character');
Expand Down Expand Up @@ -128,3 +128,90 @@ package o { use overload q|""| => sub { ${$_[0]}++ } }
for(1,2) { push @_, \join "x", 1 }
isnt $_[1], $_[0],
'join(const, const) still returns a new scalar each time';

# tests from GH #21458
# simple tied variable
{
package S;
our $fetched;
sub TIESCALAR { my $x = '-'; $fetched = 0; bless \$x }
sub FETCH { my $y = shift; $fetched++; $$y }

package main;
my $t;

tie $t, 'S';
is( join( $t, a .. c ), 'a-b-c', 'tied separator' );
is( $S::fetched, 1, 'FETCH called once' );

tie $t, 'S';
is( join( $t, 'a' ), 'a', 'tied separator on single item join' );
is( $S::fetched, 0, 'FETCH not called' );

tie $t, 'S';
is( join( $t, 'a', $t, 'b', $t, 'c' ),
'a---b---c', 'tied separator also in the join arguments' );
is( $S::fetched, 3, 'FETCH called 1 + 2 times' );
}
# self-modifying tied variable
{

package SM;
our $fetched;
sub TIESCALAR { my $x = "1"; $fetched = 0; bless \$x }
sub FETCH { my $y = shift; $fetched++; $$y += 3 }

package main;
my $t;

tie $t, "SM";
is( join( $t, a .. c ), 'a4b4c', 'tied separator' );
is( $SM::fetched, 1, 'FETCH called once' );

tie $t, "SM";
is( join( $t, 'a' ), 'a', 'tied separator on single item join' );
is( $SM::fetched, 0, 'FETCH not called' );

tie $t, "SM";
is( join( $t, "a", $t, "b", $t, "c" ),
'a474b4104c', 'tied separator also in the join arguments' );
is( $SM::fetched, 3, 'FETCH called 1 + 2 times' );
}
{
# see GH #21484
my $expect = "a\x{100}\x{100}x\x{100}\x{100}b\n";
utf8::encode($expect);
fresh_perl_is(<<'CODE', $expect, {}, "modifications delim from magic should be ignored");
# The x $n here is to ensure the PV of $sep isn't a COW of some other SV
# so the PV of $sep is unlikely to change when the overload assigns to $sep.
my $n = 2;
my $sep = "\x{100}" x $n;
package MyOver {
use overload '""' => sub { $sep = "\xFF" x $n; "x" };
}

my $x = bless {}, "MyOver";
binmode STDOUT, ":utf8";
print join($sep, "a", $x, "b"), "\n";
CODE
}
{
# see GH #21484
my $expect = "x\x{100}\x{100}a\n";
utf8::encode($expect); # fresh_perl() does bytes
fresh_perl_is(<<'CODE', $expect, {}, "modifications to delim PVX shouldn't crash");
# the x $n here is to ensure $sep has it's own PV rather than sharing it
# in a COW sense, This means that when the expanded version ($n+20) is assigned
# the origin PV has been released and valgrind or ASAN can pick up the use
# of the freed buffer.
my $n = 2;
my $sep = "\x{100}" x $n;
package MyOver {
use overload '""' => sub { $sep = "\xFF" x ($n+20); "x" };
}

my $x = bless {}, "MyOver";
binmode STDOUT, ":utf8";
print join($sep, $x, "a"), "\n";
CODE
}