Skip to content

Commit b7f18fb

Browse files
committed
beam_ssa_opt: Fix block tracking in bsm_shortcut
Success blocks were not adequately tracked, causing `bs_test_tail` instructions to be removed when they shouldn't have been. Fixes GH-10047
1 parent c388a2d commit b7f18fb

File tree

3 files changed

+174
-83
lines changed

3 files changed

+174
-83
lines changed

lib/compiler/src/beam_ssa_opt.erl

Lines changed: 97 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2002,8 +2002,7 @@ bsm_skip([], _) -> [].
20022002

20032003
bsm_skip_is([I0|Is], Extracted) ->
20042004
case I0 of
2005-
#b_set{anno=Anno0,
2006-
op=bs_match,
2005+
#b_set{op=bs_match,
20072006
dst=Ctx,
20082007
args=[#b_literal{val=T}=Type,PrevCtx|Args0]}
20092008
when T =/= float, T =/= string, T =/= skip ->
@@ -2014,9 +2013,7 @@ bsm_skip_is([I0|Is], Extracted) ->
20142013
I0;
20152014
false ->
20162015
%% The value is never extracted.
2017-
Args = [#b_literal{val=skip},PrevCtx,Type|Args0],
2018-
Anno = maps:remove(arg_types, Anno0),
2019-
I0#b_set{anno=Anno,args=Args}
2016+
I0#b_set{args=[#b_literal{val=skip},PrevCtx,Type|Args0]}
20202017
end,
20212018
[I|Is];
20222019
#b_set{} ->
@@ -2117,7 +2114,7 @@ ssa_opt_bsm_shortcut({#opt_st{ssa=Linear0}=St, FuncDb}) ->
21172114
{St, FuncDb};
21182115
_ ->
21192116
Linear1 = bsm_shortcut(Linear0, Positions),
2120-
Linear = bsm_tail(Linear1, #{}),
2117+
Linear = bsm_tail(Linear1, #{ 0 => any }),
21212118
ssa_opt_live({St#opt_st{ssa=Linear}, FuncDb})
21222119
end.
21232120

@@ -2200,86 +2197,106 @@ bsm_shortcut([], _PosMap) -> [].
22002197
%% m1(<<_, Rest/binary>>) -> m1(Rest);
22012198
%% m1(<<>>) -> ok.
22022199
%%
2203-
%% The second clause of `m1/1` does not need to check for an empty
2204-
%% binary.
2205-
2206-
bsm_tail([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Map0) ->
2207-
{Is,Last,Map} = bsm_tail_is(Is0, Last0, L, Map0, []),
2208-
Blk = Blk0#b_blk{is=Is,last=Last},
2209-
[{L,Blk}|bsm_tail(Bs, Map)];
2210-
bsm_tail([], _Map) ->
2200+
%% The second clause of `m1/1` does not need to check for an empty bitstring.
2201+
%%
2202+
%% This is done by keeping track of which blocks are reachable solely because
2203+
%% of `bs_match` instructions that can only fail because the end has been
2204+
%% reached, and then eliminating the related `bs_match` and `bs_test_tail`
2205+
%% instructions in those blocks.
2206+
2207+
bsm_tail([{L, #b_blk{is=Is0}=Blk0} | Bs], Tags0) when is_map_key(L, Tags0) ->
2208+
{Blk, Tags} = bsm_tail_is_1(Is0, Blk0, L, Tags0),
2209+
[{L, Blk} | bsm_tail(Bs, Tags)];
2210+
bsm_tail([_ | Bs], Tags) ->
2211+
bsm_tail(Bs, Tags);
2212+
bsm_tail([], _Tags) ->
22112213
[].
22122214

2213-
bsm_tail_is([#b_set{op=bs_start_match,anno=Anno,dst=Dst}=I|Is], Last, L, Map0, Acc) ->
2214-
case Anno of
2215-
#{arg_types := #{1 := Type}} ->
2216-
case beam_types:get_bs_matchable_unit(Type) of
2217-
error ->
2218-
bsm_tail_is(Is, Last, L, Map0, [I|Acc]);
2219-
Unit when is_integer(Unit) ->
2220-
Map = Map0#{Dst => Unit},
2221-
bsm_tail_is(Is, Last, L, Map, [I|Acc])
2222-
end;
2223-
#{} ->
2224-
bsm_tail_is(Is, Last, L, Map0, [I|Acc])
2225-
end;
2226-
bsm_tail_is([#b_set{op=bs_match,dst=Dst,args=Args},
2227-
#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}|_]=Is,
2228-
#b_br{bool=SuccDst,fail=Fail}=Last,
2229-
_L, Map0, Acc) ->
2230-
case bsm_tail_num_matched(Args, Map0) of
2231-
unknown ->
2232-
%% Unknown number of bits or the match operation will fail
2233-
%% to match certain values.
2234-
Map = Map0#{Fail => unknown},
2235-
{reverse(Acc, Is),Last,Map};
2236-
Bits when is_integer(Bits) ->
2237-
case Map0 of
2238-
#{Fail := Bits} ->
2239-
{reverse(Acc, Is),Last,Map0};
2240-
#{Fail := _} ->
2241-
Map = Map0#{Fail => unknown},
2242-
{reverse(Acc, Is),Last,Map};
2243-
#{} ->
2244-
Map = Map0#{Fail => Bits},
2245-
{reverse(Acc, Is),Last,Map}
2246-
end
2247-
end;
2248-
bsm_tail_is([#b_set{op=bs_test_tail,args=[_,#b_literal{val=0}],dst=Dst}]=Is,
2249-
#b_br{bool=Dst,succ=Succ}=Last0, L, Map0, Acc) ->
2250-
case Map0 of
2251-
#{L := Bits} when is_integer(Bits) ->
2252-
%% The `bs_match` instruction targeting this block on failure
2253-
%% will only fail when the end of the binary has been reached.
2254-
%% There is no need for the test.
2255-
Last = beam_ssa:normalize(Last0#b_br{fail=Succ}),
2256-
{reverse(Acc, Is),Last,Map0};
2257-
#{} ->
2258-
{reverse(Acc, Is),Last0,Map0}
2259-
end;
2260-
bsm_tail_is([#b_set{}=I|Is], Last, L, Map, Acc) ->
2261-
bsm_tail_is(Is, Last, L, Map, [I|Acc]);
2262-
bsm_tail_is([], Last, _L, Map0, Acc) ->
2263-
Map = foldl(fun(F, A) ->
2264-
A#{F => unknown}
2265-
end, Map0, beam_ssa:successors(#b_blk{is=[],last=Last})),
2266-
{reverse(Acc),Last,Map}.
2267-
2268-
bsm_tail_num_matched([#b_literal{val=skip},Ctx,Type,Flags,Size,Unit], Map) ->
2269-
bsm_tail_num_matched([Type,Ctx,Flags,Size,Unit], Map);
2270-
bsm_tail_num_matched([#b_literal{val=Type},Ctx,#b_literal{},
2271-
#b_literal{val=Size},#b_literal{val=Unit}], Map)
2215+
bsm_tail_is_1([#b_set{op=bs_match,anno=Anno,dst=Dst,args=[_, Ctx | _]=Args},
2216+
#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}],
2217+
#b_blk{last=#b_br{bool=SuccDst,succ=Succ,fail=Fail}=Last}=Blk0,
2218+
L, Tags) ->
2219+
case {Tags, bsm_tail_match_tag(Args, Anno)} of
2220+
{#{ L := Ctx }, Ctx} ->
2221+
%% This block can only be reached through matches that fail because
2222+
%% the context is empty, and the current match will likewise only
2223+
%% fail because the context is empty, so we KNOW that this cannot
2224+
%% succeed.
2225+
%%
2226+
%% Kill the instruction and propagate the condition.
2227+
Blk = Blk0#b_blk{last=beam_ssa:normalize(Last#b_br{succ=Fail})},
2228+
{Blk, bsm_tail_update_target(Fail, Fail, Ctx, Tags)};
2229+
{#{ L := _ }, Tag} ->
2230+
%% `any` or different context. Mark the fail block with whether
2231+
%% it's reachable solely because the context is empty.
2232+
{Blk0, bsm_tail_update_target(Succ, Fail, Tag, Tags)}
2233+
end;
2234+
bsm_tail_is_1([#b_set{op=bs_test_tail,args=[Ctx,#b_literal{val=Size}],dst=Dst}],
2235+
#b_blk{last=#b_br{bool=Dst,succ=Succ,fail=Fail}=Last0}=Blk0,
2236+
L, Tags) ->
2237+
true = is_integer(Size) andalso Size >= 0, %Assertion.
2238+
case Tags of
2239+
#{ L := Ctx } ->
2240+
%% This block can only be reached through matches that fail because
2241+
%% the end of the context has been reached.
2242+
%%
2243+
%% Kill the instruction and propagate the condition.
2244+
Next = case Size of
2245+
0 -> Succ;
2246+
_ -> Fail
2247+
end,
2248+
Last = beam_ssa:normalize(Last0#b_br{succ=Next,fail=Next}),
2249+
Blk = Blk0#b_blk{last=Last},
2250+
{Blk, bsm_tail_update_target(Next, Next, Ctx, Tags)};
2251+
#{ L := _ } ->
2252+
%% `any` or different context. We cannot optimize this, but it's
2253+
%% safe to mark the success block as only being reachable when the
2254+
%% context is empty.
2255+
Tag = case Size of
2256+
0 -> Ctx;
2257+
_ -> any
2258+
end,
2259+
{Blk0, bsm_tail_update_target(Fail, Succ, Tag, Tags)}
2260+
end;
2261+
bsm_tail_is_1([#b_set{} | Is], Blk, L, Tags) ->
2262+
bsm_tail_is_1(Is, Blk, L, Tags);
2263+
bsm_tail_is_1([], Blk, _L, Tags0) ->
2264+
Tags = foldl(fun(Lbl, Acc) ->
2265+
Acc#{ Lbl => any }
2266+
end, Tags0, beam_ssa:successors(Blk)),
2267+
{Blk, Tags}.
2268+
2269+
bsm_tail_match_tag([#b_literal{val=skip}, Ctx, Type | Rest], Anno) ->
2270+
bsm_tail_match_tag([Type, Ctx | Rest], Anno);
2271+
bsm_tail_match_tag([#b_literal{val=Type},
2272+
#b_var{}=Ctx,
2273+
#b_literal{},
2274+
#b_literal{val=Size},
2275+
#b_literal{val=Unit}],
2276+
Anno)
22722277
when (Type =:= integer orelse Type =:= binary),
22732278
is_integer(Size), is_integer(Unit) ->
22742279
Bits = Size * Unit,
2275-
case Map of
2276-
#{Ctx := Bits} when is_integer(Bits) ->
2277-
Bits;
2280+
case Anno of
2281+
#{ arg_types := #{ 1 := CtxType } } ->
2282+
case beam_types:get_bs_matchable_unit(CtxType) of
2283+
Bits -> Ctx;
2284+
_ -> any
2285+
end;
22782286
#{} ->
2279-
unknown
2280-
end;
2281-
bsm_tail_num_matched(_Args, _Map) ->
2282-
unknown.
2287+
any
2288+
end;
2289+
bsm_tail_match_tag(_Args, _Anno) ->
2290+
any.
2291+
2292+
bsm_tail_update_target(Succ, Fail, Tag, Tags) when Succ =/= Fail ->
2293+
bsm_tail_update_target(Fail, Fail, Tag, Tags#{ Succ => any });
2294+
bsm_tail_update_target(Same, Same, Tag, Tags) ->
2295+
case Tags of
2296+
#{ Same := Tag } -> Tags;
2297+
#{ Same := _ } -> Tags#{ Same => any };
2298+
#{} -> Tags#{ Same => Tag }
2299+
end.
22832300

22842301
%%%
22852302
%%% Optimize binary construction.

lib/compiler/src/beam_validator.erl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -915,11 +915,13 @@ vi({test,bs_skip_bits2,{f,Fail},[Ctx,Size0,Unit,_Flags]}, Vst) ->
915915
end,
916916

917917
validate_bs_skip(Fail, Ctx, Stride, Vst);
918-
vi({test,bs_test_tail2,{f,Fail},[Ctx,_Size]}, Vst) ->
918+
vi({test,bs_test_tail2,{f,Fail},[Ctx0,_Size]}, Vst) ->
919+
Ctx = unpack_typed_arg(Ctx0, Vst),
919920
assert_no_exception(Fail),
920921
assert_type(#t_bs_context{}, Ctx, Vst),
921922
branch(Fail, Vst);
922-
vi({test,bs_test_unit,{f,Fail},[Ctx,Unit]}, Vst) ->
923+
vi({test,bs_test_unit,{f,Fail},[Ctx0,Unit]}, Vst) ->
924+
Ctx = unpack_typed_arg(Ctx0, Vst),
923925
assert_type(#t_bs_context{}, Ctx, Vst),
924926

925927
Type = #t_bs_context{tail_unit=Unit},
@@ -1849,7 +1851,8 @@ validate_bs_get_1(Fail, Ctx, Live, Vst, SuccFun) ->
18491851
validate_bs_skip(Fail, Ctx, Stride, Vst) ->
18501852
validate_bs_skip(Fail, Ctx, Stride, no_live, Vst).
18511853

1852-
validate_bs_skip(Fail, Ctx, Stride, Live, Vst) ->
1854+
validate_bs_skip(Fail, Ctx0, Stride, Live, Vst) ->
1855+
Ctx = unpack_typed_arg(Ctx0, Vst),
18531856
assert_no_exception(Fail),
18541857

18551858
assert_type(#t_bs_context{}, Ctx, Vst),

lib/compiler/test/bs_match_SUITE.erl

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2533,6 +2533,8 @@ empty_matches(Config) when is_list(Config) ->
25332533
{'EXIT',{function_clause,[_|_]}} = catch em_4(<<0:1>>, <<>>),
25342534
{'EXIT',{function_clause,[_|_]}} = catch em_4(<<0:1>>, <<0:1>>),
25352535

2536+
ok = em_5(),
2537+
25362538
ok.
25372539

25382540
em_1(Bytes) ->
@@ -2560,6 +2562,75 @@ em_3_1(I) -> I.
25602562
em_4(<<X:0, _:X>>, <<Y:0, _:Y>>) ->
25612563
ok.
25622564

2565+
%% GH-10047
2566+
2567+
em_5() ->
2568+
A = id(<<1>>),
2569+
Empty = id(<<>>),
2570+
<<Zero>> = id(<<0>>),
2571+
2572+
true = is_bitstring(A),
2573+
true = is_bitstring(Empty),
2574+
2575+
<<1,128>> = em_5_1(A),
2576+
<<0>> = em_5_1(id(Empty)),
2577+
2578+
true = is_binary(A),
2579+
true = is_binary(Empty),
2580+
2581+
ok = em_5_coverage_1(A),
2582+
ok = em_5_coverage_1(Empty),
2583+
2584+
ok = em_5_coverage_2(A),
2585+
ok = em_5_coverage_2(Empty),
2586+
2587+
ok = em_5_coverage_3(A),
2588+
ok = em_5_coverage_3(Empty),
2589+
2590+
ok = em_5_coverage_4(A, Zero),
2591+
ok = em_5_coverage_4(Empty, Zero),
2592+
2593+
ok.
2594+
2595+
em_5_1(<<Chunk:7/bits>>) ->
2596+
<<Chunk:7/bits, 0:1>>;
2597+
em_5_1(<<Chunk:1/bits>>) ->
2598+
<<Chunk:1/bits, 0:7>>;
2599+
em_5_1(<<>>) ->
2600+
<<0:8>>;
2601+
em_5_1(<<Chunk:7/bits, Rest/bits>>) ->
2602+
<<Chunk:7/bits, 1:1, (em_5_1(Rest))/bits>>.
2603+
2604+
%% Improves coverage.
2605+
em_5_coverage_1(<<_:8/bits, Rest/binary>>) ->
2606+
em_5_coverage_1(Rest);
2607+
em_5_coverage_1(<<_:8/integer, _/binary>>) ->
2608+
unreachable;
2609+
em_5_coverage_1(<<>>) ->
2610+
ok.
2611+
2612+
em_5_coverage_2(<<_:8/bits, Rest/binary>>) ->
2613+
em_5_coverage_2(Rest);
2614+
em_5_coverage_2(<<_:8/integer, _/binary>>) ->
2615+
unreachable;
2616+
em_5_coverage_2(<<>>) ->
2617+
ok.
2618+
2619+
em_5_coverage_3(<<_:8/bits, Rest/binary>>) ->
2620+
em_5_coverage_3(Rest);
2621+
em_5_coverage_3(<<_>>) ->
2622+
unreachable;
2623+
em_5_coverage_3(<<>>) ->
2624+
ok.
2625+
2626+
em_5_coverage_4(<<_:8/bits, Rest/binary>>, TailBits) ->
2627+
em_5_coverage_4(Rest, TailBits);
2628+
em_5_coverage_4(Rest, TailBits) ->
2629+
case Rest of
2630+
<<_:TailBits/bits>> -> ok;
2631+
<<>> -> error
2632+
end.
2633+
25632634
%% beam_trim would sometimes crash when bs_start_match4 had {atom,resume} as
25642635
%% its fail label.
25652636
trim_bs_start_match_resume(Config) when is_list(Config) ->

0 commit comments

Comments
 (0)