diff --git a/bootstrap/lib/compiler/ebin/compiler.app b/bootstrap/lib/compiler/ebin/compiler.app index b4051ad3a770..134ea1fda509 100644 --- a/bootstrap/lib/compiler/ebin/compiler.app +++ b/bootstrap/lib/compiler/ebin/compiler.app @@ -50,6 +50,7 @@ beam_ssa_private_append, beam_ssa_recv, beam_ssa_share, + beam_ssa_ss, beam_ssa_throw, beam_ssa_type, beam_trim, diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 2ca71218d81a..5ca5cb28076c 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -373,6 +373,7 @@ atom info_trap atom inherit atom init atom initial_call +atom inplace atom input atom integer atom internal diff --git a/erts/emulator/beam/emu/generators.tab b/erts/emulator/beam/emu/generators.tab index d2c0f5974070..fa557e375aa7 100644 --- a/erts/emulator/beam/emu/generators.tab +++ b/erts/emulator/beam/emu/generators.tab @@ -1026,15 +1026,20 @@ gen.create_bin(Fail, Alloc, Live, Unit, Dst, N, Segments) { return op; } -gen.update_record(Size, Src, Dst, N, Updates) { +gen.update_record(Hint, Size, Src, Dst, N, Updates) { BeamOp *begin, *prev; Sint count, i; + ASSERT(Hint.type == TAG_a); ASSERT(Size.type == TAG_u && Size.val < SCRATCH_X_REG); ASSERT(N.type == TAG_u && !(N.val % 2) && (N.val / 2) <= Size.val); $NewBeamOp(S, begin); - $BeamOpNameArity(begin, i_update_record, 5); + if (Hint.val == am_inplace) { + $BeamOpNameArity(begin, i_update_record_in_place, 5); + } else { + $BeamOpNameArity(begin, i_update_record_copy, 5); + } begin->a[0] = Size; begin->a[1] = Src; @@ -1047,6 +1052,7 @@ gen.update_record(Size, Src, Dst, N, Updates) { for (i = 2; i < count; i += 2) { BeamOp *next; + int same_reg; $NewBeamOp(S, next); $BeamOpNameArity(next, i_update_record_continue, 2); @@ -1056,10 +1062,19 @@ gen.update_record(Size, Src, Dst, N, Updates) { next->a[0].type = TAG_u; next->a[0].val = (Size.val + 1) - Updates[i].val; + if (Updates[i + 1].type != Dst.type) { + same_reg = 0; + } else if (Dst.type == TAG_x || Dst.type == TAG_y) { + /* We must not compare the type indices (if any). */ + same_reg = (Updates[i + 1].val & REG_MASK) == (Dst.val & REG_MASK); + } else { + same_reg = 1; + } + /* The first instruction overwrites the destination register after * stashing its contents to SCRATCH_X_REG, so all updates must be * rewritten accordingly. */ - if (Updates[i + 1].type == Dst.type && Updates[i + 1].val == Dst.val) { + if (same_reg) { next->a[1].type = TAG_x; next->a[1].val = SCRATCH_X_REG; } else { @@ -1072,6 +1087,16 @@ gen.update_record(Size, Src, Dst, N, Updates) { prev = next; } + if (Hint.val == am_inplace) { + BeamOp *next; + + $NewBeamOp(S, next); + $BeamOpNameArity(next, i_update_record_in_place_done, 0); + + next->next = NULL; + prev->next = next; + } + return begin; } diff --git a/erts/emulator/beam/emu/instrs.tab b/erts/emulator/beam/emu/instrs.tab index 5238fdd5dcec..ee27f19a0126 100644 --- a/erts/emulator/beam/emu/instrs.tab +++ b/erts/emulator/beam/emu/instrs.tab @@ -782,7 +782,7 @@ self(Dst) { $Dst = c_p->common.id; } -i_update_record(Size, Src, Dst, Offset, Element) { +i_update_record_copy(Size, Src, Dst, Offset, Element) { Eterm *untagged_source = tuple_val($Src); Uint size_on_heap = $Size + 1; @@ -801,11 +801,47 @@ i_update_record(Size, Src, Dst, Offset, Element) { HTOP += size_on_heap; } +i_update_record_in_place(Size, Src, Dst, Offset, Element) { + Eterm *untagged_source = tuple_val($Src); + Uint size_on_heap = $Size + 1; + + if (c_p->high_water <= untagged_source && untagged_source < HTOP) { + /* It is safe to overwrite the old record. */ + LIGHT_SWAPOUT; + + untagged_source[$Offset] = $Element; + + /* We stash the contents of the destination register in SCRATCH_X_REG in + * case it's used in subsequent `i_update_record_continue` instructions. + * The updates have been rewritten accordingly. */ + reg[SCRATCH_X_REG] = $Dst; + $Dst = $Src; + + HTOP = untagged_source + size_on_heap; + } else { + /* It would be unsafe to to overwrite the old record, because + * that could cause a term on the old generation to point to + * the young generation. */ + sys_memcpy(HTOP, untagged_source, size_on_heap * sizeof(Eterm)); + HTOP[$Offset] = $Element; + + reg[SCRATCH_X_REG] = $Dst; + $Dst = make_tuple(HTOP); + + HTOP += size_on_heap; + LIGHT_SWAPOUT; + } +} + i_update_record_continue(OffsetFromEnd, Element) { Sint offset = -(Sint)$OffsetFromEnd; HTOP[offset] = $Element; } +i_update_record_in_place_done() { + LIGHT_SWAPIN; +} + set_tuple_element(Element, Tuple, Offset) { Eterm* p; diff --git a/erts/emulator/beam/emu/ops.tab b/erts/emulator/beam/emu/ops.tab index 2b08dd60a972..c37acac11759 100644 --- a/erts/emulator/beam/emu/ops.tab +++ b/erts/emulator/beam/emu/ops.tab @@ -1730,9 +1730,18 @@ recv_marker_use S # update_record Hint=a Size=u Src=s Dst=d N=u Updates=* => - update_record(Size, Src, Dst, N, Updates) + update_record(Hint, Size, Src, Dst, N, Updates) + +i_update_record_copy Size=u Src=c Dst=xy Offset=u Element=s => + move Src x | i_update_record_copy Size x Dst Offset Element + +i_update_record_in_place Size=u Src=c Dst=xy Offset=u Element=s => + move Src x | i_update_record_in_place Size x Dst Offset Element + +i_update_record_copy t xy xy t s +i_update_record_in_place t xy xy t s -i_update_record Size=u Src=c Dst=xy Offset=u Element=s => - move Src x | i_update_record Size x Dst Offset Element -i_update_record t xy xy t s i_update_record_continue t s + +i_update_record_in_place_done + diff --git a/erts/emulator/beam/jit/arm/beam_asm.hpp b/erts/emulator/beam/jit/arm/beam_asm.hpp index 96fc00656037..7db73c6a1986 100644 --- a/erts/emulator/beam/jit/arm/beam_asm.hpp +++ b/erts/emulator/beam/jit/arm/beam_asm.hpp @@ -1736,8 +1736,24 @@ class BeamModuleAssembler : public BeamAssembler, a.cmp(gp, tmp.reg); } + void safe_str(a64::Gp gp, arm::Mem mem) { + size_t abs_offset = std::abs(mem.offset()); + auto offset = mem.offset(); + + ASSERT(mem.hasBaseReg() && !mem.hasIndex()); + ASSERT(gp.isGpX()); + + if (abs_offset <= sizeof(Eterm) * MAX_LDR_STR_DISPLACEMENT) { + a.str(gp, mem); + } else { + add(SUPER_TMP, a64::GpX(mem.baseId()), offset); + a.str(gp, a64::Mem(SUPER_TMP)); + } + } + void safe_stp(a64::Gp gp1, a64::Gp gp2, + const ArgVal &Dst1, const ArgVal &Dst2) { ASSERT(ArgVal::memory_relation(Dst1, Dst2) == diff --git a/erts/emulator/beam/jit/arm/instr_common.cpp b/erts/emulator/beam/jit/arm/instr_common.cpp index ca818c8f610c..8f8384fa428a 100644 --- a/erts/emulator/beam/jit/arm/instr_common.cpp +++ b/erts/emulator/beam/jit/arm/instr_common.cpp @@ -991,6 +991,88 @@ void BeamModuleAssembler::emit_update_record(const ArgAtom &Hint, flush_var(destination); } +void BeamModuleAssembler::emit_update_record_in_place( + const ArgWord &TupleSize, + const ArgSource &Src, + const ArgRegister &Dst, + const ArgWord &UpdateCount, + const Span &updates) { + bool all_safe = true; + ArgSource maybe_immediate = ArgNil(); + const size_t size_on_heap = TupleSize.get() + 1; + + ASSERT(UpdateCount.get() == updates.size()); + ASSERT((UpdateCount.get() % 2) == 0); + + ASSERT(size_on_heap > 2); + + auto destination = init_destination(Dst, ARG1); + auto src = load_source(Src, ARG2); + + a64::Gp untagged_src = ARG3; + emit_untag_ptr(untagged_src, src.reg); + + for (size_t i = 0; i < updates.size(); i += 2) { + const auto &value = updates[i + 1].as(); + if (!(always_immediate(value) || value.isLiteral())) { + all_safe = false; + if (maybe_immediate.isNil() && + always_one_of(value)) { + maybe_immediate = value; + } else { + maybe_immediate = ArgNil(); + break; + } + } + } + + if (all_safe) { + comment("skipped copy fallback because all new values are safe"); + } else { + Label update = a.newLabel(); + + if (!maybe_immediate.isNil()) { + auto value = load_source(maybe_immediate, ARG5); + emit_is_not_boxed(update, value.reg); + } + + a.ldr(ARG4, arm::Mem(c_p, offsetof(Process, high_water))); + a.cmp(untagged_src, HTOP); + a.ccmp(untagged_src, ARG4, imm(NZCV::kNone), imm(arm::CondCode::kLO)); + a.b_hs(update); + + emit_copy_words_increment(untagged_src, HTOP, size_on_heap); + sub(untagged_src, HTOP, size_on_heap * sizeof(Eterm)); + + a.bind(update); + } + + for (size_t i = 0; i < updates.size(); i += 2) { + const auto next_index = updates[i].as().get(); + const auto &next_value = updates[i + 1].as(); + arm::Mem mem(untagged_src, next_index * sizeof(Eterm)); + + if (i + 2 < updates.size()) { + const auto adjacent_index = updates[i + 2].as().get(); + const auto &adjacent_value = updates[i + 3].as(); + + if (adjacent_index == next_index + 1) { + auto [first, second] = + load_sources(next_value, TMP1, adjacent_value, TMP2); + safe_stp(first.reg, second.reg, mem); + i += 2; + continue; + } + } + + auto value = load_source(next_value, TMP1); + safe_str(value.reg, mem); + } + + a.add(destination.reg, untagged_src, TAG_PRIMARY_BOXED); + flush_var(destination); +} + void BeamModuleAssembler::emit_set_tuple_element(const ArgSource &Element, const ArgRegister &Tuple, const ArgWord &Offset) { diff --git a/erts/emulator/beam/jit/arm/ops.tab b/erts/emulator/beam/jit/arm/ops.tab index a5c98b27ca5e..7cccc12d8a6d 100644 --- a/erts/emulator/beam/jit/arm/ops.tab +++ b/erts/emulator/beam/jit/arm/ops.tab @@ -1484,4 +1484,8 @@ i_lambda_trampoline F f W W # OTP 26 # +update_record a==am_inplace Size Src=d Dst N Updates=* => + update_record_in_place Size Src Dst N Updates + update_record a I s d I * +update_record_in_place I s d I * diff --git a/erts/emulator/beam/jit/x86/instr_common.cpp b/erts/emulator/beam/jit/x86/instr_common.cpp index 53cc9bd4bc45..fb3fe4d1524a 100644 --- a/erts/emulator/beam/jit/x86/instr_common.cpp +++ b/erts/emulator/beam/jit/x86/instr_common.cpp @@ -984,6 +984,102 @@ void BeamModuleAssembler::emit_update_record(const ArgAtom &Hint, mov_arg(Dst, RET); } +void BeamModuleAssembler::emit_update_record_in_place( + const ArgWord &TupleSize, + const ArgSource &Src, + const ArgRegister &Dst, + const ArgWord &UpdateCount, + const Span &updates) { + bool all_safe = true; + ArgSource maybe_immediate = ArgNil(); + const size_t size_on_heap = TupleSize.get() + 1; + + ASSERT(UpdateCount.get() == updates.size()); + ASSERT((UpdateCount.get() % 2) == 0); + + ASSERT(size_on_heap > 2); + + for (size_t i = 0; i < updates.size(); i += 2) { + const auto &value = updates[i + 1].as(); + if (!(always_immediate(value) || value.isLiteral())) { + all_safe = false; + if (maybe_immediate.isNil() && + always_one_of(value)) { + maybe_immediate = value; + } else { + maybe_immediate = ArgNil(); + break; + } + } + } + + x86::Gp tagged_ptr = RET; + + mov_arg(tagged_ptr, Src); + +#if defined(DEBUG) && defined(TAG_LITERAL_PTR) + /* The compiler guarantees that the tuple is not a literal. */ + { + Label not_literal = a.newLabel(); + + a.test(tagged_ptr, imm(TAG_LITERAL_PTR)); + a.short_().je(not_literal); + a.ud2(); + + a.bind(not_literal); + } +#endif + + if (all_safe) { + comment("skipped copy fallback because all new values are safe"); + } else { + Label update = a.newLabel(); + + if (!maybe_immediate.isNil()) { + mov_arg(ARG4, maybe_immediate); + preserve_cache([&]() { + emit_is_not_boxed(update, ARG4, dShort); + }); + } + + preserve_cache( + [&]() { + Label copy = a.newLabel(); + + a.mov(ARG1, x86::Mem(c_p, offsetof(Process, high_water))); + a.cmp(tagged_ptr, HTOP); + a.short_().jae(copy); + + a.cmp(tagged_ptr, ARG1); + a.short_().jae(update); + + a.bind(copy); + emit_copy_words(emit_boxed_val(tagged_ptr, 0), + x86::qword_ptr(HTOP, 0), + size_on_heap, + ARG1); + a.lea(RET, x86::qword_ptr(HTOP, TAG_PRIMARY_BOXED)); + a.add(HTOP, imm(size_on_heap * sizeof(Eterm))); + + a.bind(update); + }, + ARG1); + } + + for (size_t i = 0; i < updates.size(); i += 2) { + const auto next_index = updates[i].as().get(); + const auto &next_value = updates[i + 1].as(); + + ASSERT(next_index > 0); + + mov_arg(emit_boxed_val(RET, next_index * sizeof(Eterm)), + next_value, + ARG1); + } + + mov_arg(Dst, RET); +} + void BeamModuleAssembler::emit_set_tuple_element(const ArgSource &Element, const ArgRegister &Tuple, const ArgWord &Offset) { diff --git a/erts/emulator/beam/jit/x86/ops.tab b/erts/emulator/beam/jit/x86/ops.tab index 8969eba81ba9..7f52f42f6c60 100644 --- a/erts/emulator/beam/jit/x86/ops.tab +++ b/erts/emulator/beam/jit/x86/ops.tab @@ -1413,4 +1413,7 @@ recv_marker_use S # OTP 26 # +update_record a==am_inplace Size Src=d Dst N Updates=* => + update_record_in_place Size Src Dst N Updates update_record a I s d I * +update_record_in_place I s d I * diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile index 297f6b1253a5..df90d7458ede 100644 --- a/lib/compiler/src/Makefile +++ b/lib/compiler/src/Makefile @@ -69,13 +69,14 @@ MODULES = \ beam_ssa_check \ beam_ssa_codegen \ beam_ssa_dead \ + beam_ssa_destructive_update \ beam_ssa_lint \ beam_ssa_opt \ beam_ssa_pp \ beam_ssa_pre_codegen \ - beam_ssa_private_append \ beam_ssa_recv \ beam_ssa_share \ + beam_ssa_ss \ beam_ssa_throw \ beam_ssa_type \ beam_trim \ @@ -221,12 +222,14 @@ $(EBIN)/beam_ssa_bool.beam: beam_ssa.hrl $(EBIN)/beam_ssa_check.beam: beam_ssa.hrl beam_types.hrl $(EBIN)/beam_ssa_codegen.beam: beam_ssa.hrl beam_asm.hrl $(EBIN)/beam_ssa_dead.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_destructive_update.beam: beam_ssa_opt.hrl beam_types.hrl $(EBIN)/beam_ssa_lint.beam: beam_ssa.hrl $(EBIN)/beam_ssa_opt.beam: beam_ssa.hrl $(EBIN)/beam_ssa_pp.beam: beam_ssa.hrl beam_types.hrl $(EBIN)/beam_ssa_pre_codegen.beam: beam_ssa.hrl beam_asm.hrl $(EBIN)/beam_ssa_recv.beam: beam_ssa.hrl $(EBIN)/beam_ssa_share.beam: beam_ssa.hrl +$(EBIN)/beam_ssa_ss.beam: beam_ssa.hrl beam_types.hrl $(EBIN)/beam_ssa_throw.beam: beam_ssa.hrl beam_types.hrl $(EBIN)/beam_ssa_type.beam: beam_ssa.hrl beam_types.hrl $(EBIN)/beam_trim.beam: beam_asm.hrl diff --git a/lib/compiler/src/beam_digraph.erl b/lib/compiler/src/beam_digraph.erl index 5d68b25f72bf..6ebbbb25b81b 100644 --- a/lib/compiler/src/beam_digraph.erl +++ b/lib/compiler/src/beam_digraph.erl @@ -30,9 +30,11 @@ -export([new/0, add_vertex/2, add_vertex/3, add_edge/3, add_edge/4, del_edge/2, del_edges/2, + foldv/3, has_vertex/2, is_path/3, in_degree/2, in_edges/2, in_neighbours/2, + no_vertices/1, out_degree/2, out_edges/2, out_neighbours/2, vertex/2, vertices/1, reverse_postorder/2, @@ -70,19 +72,9 @@ add_vertex(Dg, V) -> -spec add_vertex(graph(), vertex(), label()) -> graph(). add_vertex(Dg, V, Label) -> - #dg{in_es=InEsMap0,out_es=OutEsMap0,vs=Vs0} = Dg, - InEsMap = init_edge_map(V, InEsMap0), - OutEsMap = init_edge_map(V, OutEsMap0), + #dg{vs=Vs0} = Dg, Vs = Vs0#{V=>Label}, - Dg#dg{vs=Vs,in_es=InEsMap,out_es=OutEsMap}. - -init_edge_map(V, EsMap) -> - case is_map_key(V, EsMap) of - true -> - EsMap; - false -> - EsMap#{V=>ordsets:new()} - end. + Dg#dg{vs=Vs}. -spec add_edge(graph(), vertex(), vertex()) -> graph(). add_edge(Dg, From, To) -> @@ -97,9 +89,9 @@ add_edge(Dg, From, To, Label) -> Dg#dg{in_es=InEsMap,out_es=OutEsMap}. edge_map_add(V, E, EsMap) -> - Es0 = map_get(V, EsMap), + Es0 = maps:get(V, EsMap, []), Es = ordsets:add_element(E, Es0), - EsMap#{V:=Es}. + EsMap#{V=>Es}. -spec del_edge(graph(), edge()) -> graph(). del_edge(Dg, {From,To,_}=E) -> @@ -109,7 +101,7 @@ del_edge(Dg, {From,To,_}=E) -> Dg#dg{in_es=InEsMap,out_es=OutEsMap}. edge_map_del(V, E, EsMap) -> - Es0 = map_get(V, EsMap), + Es0 = maps:get(V, EsMap, []), Es = Es0 -- [E], EsMap#{V:=Es}. @@ -117,21 +109,26 @@ edge_map_del(V, E, EsMap) -> del_edges(G, Es) when is_list(Es) -> foldl(fun(E, A) -> del_edge(A, E) end, G, Es). +%% Fold over the vertices of the graph, the order is unspecified. +-spec foldv(graph(), fun((vertex(), label(), any()) -> any()), any()) -> any(). +foldv(#dg{vs=Vs}, Fun, Acc) -> + maps:fold(Fun, Acc, Vs). + -spec has_vertex(graph(), vertex()) -> boolean(). has_vertex(#dg{vs=Vs}, V) -> is_map_key(V, Vs). -spec in_degree(graph(), vertex()) -> non_neg_integer(). in_degree(#dg{in_es=InEsMap}, V) -> - length(map_get(V, InEsMap)). + length(maps:get(V, InEsMap, [])). -spec in_edges(graph(), vertex()) -> [edge()]. in_edges(#dg{in_es=InEsMap}, V) -> - map_get(V, InEsMap). + maps:get(V, InEsMap, []). -spec in_neighbours(graph(), vertex()) -> [vertex()]. in_neighbours(#dg{in_es=InEsMap}, V) -> - [From || {From,_,_} <- map_get(V, InEsMap)]. + [From || {From,_,_} <- maps:get(V, InEsMap, [])]. -spec is_path(graph(), vertex(), vertex()) -> boolean(). is_path(G, From, To) -> @@ -161,15 +158,19 @@ is_path_1([], _To, _G, Seen) -> -spec out_degree(graph(), vertex()) -> non_neg_integer(). out_degree(#dg{out_es=OutEsMap}, V) -> - length(map_get(V, OutEsMap)). + length(maps:get(V, OutEsMap, [])). -spec out_edges(graph(), vertex()) -> [edge()]. out_edges(#dg{out_es=OutEsMap}, V) -> - map_get(V, OutEsMap). + maps:get(V, OutEsMap, []). -spec out_neighbours(graph(), vertex()) -> [vertex()]. out_neighbours(#dg{out_es=OutEsMap}, V) -> - [To || {_,To,_} <- map_get(V, OutEsMap)]. + [To || {_,To,_} <- maps:get(V, OutEsMap, [])]. + +-spec no_vertices(graph()) -> non_neg_integer(). +no_vertices(#dg{vs=Vs}) -> + map_size(Vs). -spec vertex(graph(), vertex()) -> label(). vertex(#dg{vs=Vs}, V) -> diff --git a/lib/compiler/src/beam_ssa_alias.erl b/lib/compiler/src/beam_ssa_alias.erl index 925fa78bc7f5..296907de17f9 100644 --- a/lib/compiler/src/beam_ssa_alias.erl +++ b/lib/compiler/src/beam_ssa_alias.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2023. All Rights Reserved. +%% Copyright Ericsson AB 2024. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ -export([opt/2]). --import(lists, [foldl/3, reverse/1, zip/2]). +-import(lists, [foldl/3, reverse/1]). %% The maximum number of iterations when calculating alias %% information. @@ -40,50 +40,25 @@ -ifdef(DEBUG). -define(DP(FMT, ARGS), io:format(FMT, ARGS)). -define(DP(FMT), io:format(FMT)). +-define(DBG(STMT), STMT). -else. -define(DP(FMT, ARGS), skip). -define(DP(FMT), skip). +-define(DBG(STMT), skip). -endif. -%% Uncomment the following to get trace printouts when states are -%% merged. - -%% -define(TRACE_MERGE, true). - --ifdef(TRACE_MERGE). --define(TM_DP(FMT, ARGS), io:format(FMT, ARGS)). --define(TM_DP(FMT), io:format(FMT)). --else. --define(TM_DP(FMT, ARGS), skip). --define(TM_DP(FMT), skip). --endif. - -%% Uncomment the following to check that all invariants for the state -%% hold when a state are and has been updated. These checks are -%% expensive and not enabled by default. - -%% -define(EXTRA_ASSERTS, true). - --ifdef(EXTRA_ASSERTS). --define(aa_assert_ss(SS), aa_assert_ss(SS)). --define(ASSERT(Assert), Assert). --else. --define(aa_assert_ss(SS), SS). --define(ASSERT(Assert), skip). --endif. - --type call_args_status_map() :: #{ #b_local{} => ['aliased' | 'unique'] }. - %% Alias analysis state -record(aas, { caller :: func_id() | 'undefined', - call_args = #{} :: call_args_status_map(), + call_args = #{}, alias_map = #{} :: alias_map(), func_db :: func_info_db(), kills :: kills_map(), st_map :: st_map(), orig_st_map :: st_map(), - repeats = sets:new([{version,2}]) :: sets:set(func_id()) + repeats = sets:new([{version,2}]) :: sets:set(func_id()), + %% The next unused variable name in caller + cnt = 0 :: non_neg_integer() }). %% A code location refering to either the #b_set{} defining a variable @@ -102,20 +77,9 @@ -type lbl2ss() :: #{ beam_ssa:label() => sharing_state() }. -%% The sharing state for a variable. --record(vas, { - status :: 'unique' | 'aliased' | 'as_parent', - parents = [] :: ordsets:ordset(#b_var{}), - child = none :: #b_var{} | 'none', - extracted = [] :: ordsets:ordset(#b_var{}), - tuple_elems = [] :: ordsets:ordset({non_neg_integer(),#b_var{}}), - pair_elems = none :: 'none' - | {'hd',#b_var{}} - | {'tl',#b_var{}} - | {'both',#b_var{},#b_var{}} - }). +-type sharing_state() :: any(). % A beam_digraph graph. --type sharing_state() :: #{ #b_var{} => #vas{} }. +-type type_db() :: #{ beam_ssa:b_var() := type() }. %%% %%% Optimization pass which calculates the alias status of values and @@ -127,8 +91,11 @@ opt(StMap0, FuncDb0) -> %% Ignore functions which are not in the function db (never %% called). Funs = [ F || F <- maps:keys(StMap0), is_map_key(F, FuncDb0)], - KillsMap = killsets(Funs, StMap0), - aa(Funs, KillsMap, StMap0, FuncDb0). + StMap1 = #{ F=>expand_record_update(OptSt) || F:=OptSt <- StMap0}, + KillsMap = killsets(Funs, StMap1), + {StMap2, FuncDb} = aa(Funs, KillsMap, StMap1, FuncDb0), + StMap = #{ F=>restore_update_record(OptSt) || F:=OptSt <- StMap2}, + {StMap, FuncDb}. %%% %%% Calculate the set of variables killed at each instruction. The @@ -172,21 +139,19 @@ killsets_blk(Lbl, #b_blk{is=Is0,last=L}=Blk, LiveIns0, Kills0, PhiLiveIns) -> LiveIns = LiveIns0#{Lbl=>Live}, {LiveIns, Kills}. -killsets_is([#b_set{op=phi,dst=Dst}|Is], Live, Kills, Lbl) -> - %% The Phi uses are logically located in the predecessors. +killsets_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Kills0, Lbl) -> + %% The Phi uses are logically located in the predecessors, so we + %% don't want them live in to this block. But to correctly + %% calculate the aliasing of the arguments to the Phi in this + %% block, we need to know if the arguments live past the Phi. The + %% kill set is stored with the key {phi,Dst}. + Uses = beam_ssa:used(I), + {_,LastUses} = killsets_update_live_and_last_use(Live, Uses), + Kills = killsets_add_kills({phi,Dst}, LastUses, Kills0), killsets_is(Is, sets:del_element(Dst, Live), Kills, Lbl); killsets_is([I|Is], Live0, Kills0, Lbl) -> Uses = beam_ssa:used(I), - {Live,LastUses} = - foldl(fun(Use, {LiveAcc,LastAcc}=Acc) -> - case sets:is_element(Use, LiveAcc) of - true -> - Acc; - false -> - {sets:add_element(Use, LiveAcc), - sets:add_element(Use, LastAcc)} - end - end, {Live0,sets:new([{version,2}])}, Uses), + {Live,LastUses} = killsets_update_live_and_last_use(Live0, Uses), case I of #b_set{dst=Dst} -> killsets_is(Is, sets:del_element(Dst, Live), @@ -199,6 +164,17 @@ killsets_is([I|Is], Live0, Kills0, Lbl) -> killsets_is([], Live, Kills, _) -> {Live,Kills}. +killsets_update_live_and_last_use(Live0, Uses) -> + foldl(fun(Use, {LiveAcc,LastAcc}=Acc) -> + case sets:is_element(Use, LiveAcc) of + true -> + Acc; + false -> + {sets:add_element(Use, LiveAcc), + sets:add_element(Use, LastAcc)} + end + end, {Live0,sets:new([{version,2}])}, Uses). + killsets_add_kills(Dst, LastUses, Kills) -> Kills#{Dst=>LastUses}. @@ -310,17 +286,18 @@ aa(Funs, KillsMap, StMap, FuncDb) -> %% Set up the argument info to make all incoming arguments to %% exported functions aliased and all non-exported functions %% unique. - ArgsInfo = + ArgsInfoIn = foldl( fun(F=#b_local{}, Acc) -> #func_info{exported=E,arg_types=AT} = map_get(F, FuncDb), S = case E of true -> aliased; - false -> unique + false -> no_info end, - Acc#{F=>[S || _ <- AT]} + Acc#{F=>beam_ssa_ss:initialize_in_args([S || _ <- AT])} end, #{}, Funs), - AAS = #aas{call_args=ArgsInfo,func_db=FuncDb,kills=KillsMap, + AAS = #aas{call_args=ArgsInfoIn, + func_db=FuncDb,kills=KillsMap, st_map=StMap, orig_st_map=StMap}, aa_fixpoint(Funs, AAS). @@ -356,7 +333,10 @@ aa_fixpoint([F|Fs], Order, OldAliasMap, OldCallArgs, AAS0=#aas{st_map=StMap}, #b_local{name=#b_literal{val=_N},arity=_A} = F, AAS1 = AAS0#aas{caller=F}, ?DP("-= ~p/~p =-~n", [_N, _A]), - AAS = aa_fun(F, map_get(F, StMap), AAS1), + St = #opt_st{ssa=_Is} = map_get(F, StMap), + ?DP("code:~n~p.~n", [_Is]), + AAS = aa_fun(F, St, AAS1), + ?DP("Done ~p/~p~n", [_N, _A]), aa_fixpoint(Fs, Order, OldAliasMap, OldCallArgs, AAS, Limit); aa_fixpoint([], Order, OldAliasMap, OldCallArgs, #aas{alias_map=OldAliasMap,call_args=OldCallArgs, @@ -383,16 +363,10 @@ aa_fun(F, #opt_st{ssa=Linear0,args=Args}, %% non-exported function, if we have call argument info in the %% AAS, we use it. For an exported function, all arguments are %% assumed to be aliased. - ArgsStatus = aa_get_call_args_status(Args, F, AAS0), - SS0 = foldl(fun({Var, Status}, Acc) -> - aa_new_ssa_var(Var, Status, Acc) - end, #{}, ArgsStatus), - ?DP("Args: ~p~n", [ArgsStatus]), + {SS0,Cnt} = aa_init_fun_ss(Args, F, AAS0), #{F:=Kills} = KillsMap, {SS,#aas{call_args=CallArgs}=AAS} = - aa_blocks(Linear0, Kills, #{0=>SS0}, AAS0), - ?DP("SS:~n~p~n~n", [SS]), - + aa_blocks(Linear0, Kills, #{0=>SS0}, AAS0#aas{cnt=Cnt}), AliasMap = AliasMap0#{ F => SS }, PrevSS = maps:get(F, AliasMap0, #{}), Repeats = case PrevSS =/= SS orelse CallArgs0 =/= CallArgs of @@ -414,10 +388,11 @@ aa_blocks([{?EXCEPTION_BLOCK,_}|Bs], Kills, Lbl2SS, AAS) -> aa_blocks(Bs, Kills, Lbl2SS, AAS); aa_blocks([{L,#b_blk{is=Is0,last=T}}|Bs0], Kills, Lbl2SS0, AAS0) -> #{L:=SS0} = Lbl2SS0, + ?DP("Block: ~p~nSS: ~p~n", [L, SS0]), {FullSS,AAS1} = aa_is(Is0, SS0, AAS0), #{{live_outs,L}:=LiveOut} = Kills, {Lbl2SS1,Successors} = aa_terminator(T, FullSS, Lbl2SS0), - PrunedSS = aa_prune_ss(FullSS, LiveOut), + PrunedSS = beam_ssa_ss:prune(LiveOut, FullSS), Lbl2SS2 = aa_add_block_entry_ss(Successors, PrunedSS, Lbl2SS1), Lbl2SS = aa_set_block_exit_ss(L, FullSS, Lbl2SS2), aa_blocks(Bs0, Kills, Lbl2SS, AAS1); @@ -425,7 +400,8 @@ aa_blocks([], _Kills, Lbl2SS, AAS) -> {Lbl2SS,AAS}. aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) -> - SS1 = aa_new_ssa_var(Dst, unique, SS0), + ?DP("I: ~p~n", [I]), + SS1 = beam_ssa_ss:add_var(Dst, unique, SS0), {SS, AAS} = case Op of %% Instructions changing the alias status. @@ -472,16 +448,19 @@ aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) -> {aa_derive_from(Dst, Arg, SS1), AAS0}; get_hd -> [Arg] = Args, - {aa_pair_extraction(Dst, Arg, hd, SS1), AAS0}; + Type = maps:get(0, maps:get(arg_types, Anno0, #{0=>any}), any), + {aa_pair_extraction(Dst, Arg, hd, Type, SS1), AAS0}; get_map_element -> [Map,_Key] = Args, {aa_map_extraction(Dst, Map, SS1, AAS0), AAS0}; get_tl -> [Arg] = Args, - {aa_pair_extraction(Dst, Arg, tl, SS1), AAS0}; + Type = maps:get(0, maps:get(arg_types, Anno0, #{0=>any}), any), + {aa_pair_extraction(Dst, Arg, tl, Type, SS1), AAS0}; get_tuple_element -> [Arg,Idx] = Args, - {aa_tuple_extraction(Dst, Arg, Idx, SS1), AAS0}; + Types = maps:get(arg_types, Anno0, #{}), + {aa_tuple_extraction(Dst, Arg, Idx, Types, SS1), AAS0}; landingpad -> {aa_set_aliased(Dst, SS1), AAS0}; make_fun -> @@ -490,19 +469,47 @@ aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) -> peek_message -> {aa_set_aliased(Dst, SS1), AAS0}; phi -> - {aa_phi(Dst, Args, SS1), AAS0}; + {aa_phi(Dst, Args, Anno0, SS1, AAS0), AAS0}; put_list -> - {aa_construct_term(Dst, Args, SS1, AAS0), AAS0}; + Types = + aa_map_arg_to_type(Args, maps:get(arg_types, Anno0, #{})), + {aa_construct_pair(Dst, Args, Types, SS1, AAS0), AAS0}; put_map -> {aa_construct_term(Dst, Args, SS1, AAS0), AAS0}; put_tuple -> - {aa_construct_term(Dst, Args, SS1, AAS0), AAS0}; + Types = aa_map_arg_to_type(Args, + maps:get(arg_types, Anno0, #{})), + Values = lists:enumerate(0, Args), + {aa_construct_tuple(Dst, Values, Types, SS1, AAS0), AAS0}; update_tuple -> {aa_construct_term(Dst, Args, SS1, AAS0), AAS0}; update_record -> - [_Hint,_Size,Src|Updates] = Args, - Values = [Src|aa_update_record_get_vars(Updates)], - {aa_construct_term(Dst, Values, SS1, AAS0), AAS0}; + [#b_literal{val=Hint},_Size,Src|Updates] = Args, + RecordType = maps:get(arg_types, Anno0, #{}), + ?DP("UPDATE RECORD dst: ~p, src: ~p, type:~p~n", + [Dst,_Src,RecordType]), + Values = aa_update_record_get_vars(Updates), + ?DP("values: ~p~n", [Values]), + Types = aa_map_arg_to_type(Args, RecordType), + ?DP("updates: ~p~n", [Updates]), + ?DP("type-mapping: ~p~n", [Types]), + SS2 = aa_construct_tuple(Dst, Values, Types, SS1, AAS0), + case Hint of + reuse -> + %% If the reuse hint is set and the source + %% doesn't die here, both Src and Dst become + %% aliased, as the VM could just leave Src + %% unchanged and move it to Dst. + KillSet = aa_killset_for_instr(Dst, AAS0), + case sets:is_element(Src, KillSet) of + true -> + {SS2,AAS0}; + false -> + {aa_set_status([Dst,Src], aliased, SS2), AAS0} + end; + copy -> + {SS2,AAS0} + end; %% Instructions which don't change the alias status {float,_} -> @@ -548,6 +555,7 @@ aa_is([I=#b_set{dst=Dst,op=Op,args=Args,anno=Anno0}|Is], SS0, AAS0) -> _ -> exit({unknown_instruction, I}) end, + ?DP("Post I: ~p.~p~n", [I, SS]), aa_is(Is, SS, AAS); aa_is([], SS, AAS) -> {SS, AAS}. @@ -558,18 +566,18 @@ aa_terminator(#b_br{succ=S,fail=F}, _SS, Lbl2SS) -> {Lbl2SS,[S,F]}; aa_terminator(#b_ret{arg=Arg,anno=Anno0}, SS, Lbl2SS0) -> Type = maps:get(result_type, Anno0, any), - Status0 = aa_get_status(Arg, SS), - ?DP("Returned ~p:~p:~p~n", [Arg, Status0, Type]), Type2Status0 = maps:get(returns, Lbl2SS0, #{}), - Status = case Type2Status0 of - #{ Type := OtherStatus } -> - aa_meet(Status0, OtherStatus); - #{ } -> - Status0 - end, + Status0 = case Type2Status0 of + #{ Type := OtherStatus } -> + OtherStatus; + #{ } -> + no_info + end, + [Status] = beam_ssa_ss:merge_in_args([Arg], [Status0], SS), Type2Status = Type2Status0#{ Type => Status }, + ?DP("Returned ~p:~p:~p~n", [Arg, Status, Type]), ?DP("New status map: ~p~n", [Type2Status]), - Lbl2SS = Lbl2SS0#{ returns => Type2Status}, + Lbl2SS = Lbl2SS0#{ returns => Type2Status }, {Lbl2SS, []}; aa_terminator(#b_switch{fail=F,list=Ls}, _SS, Lbl2SS) -> {Lbl2SS,[F|[L || {_,L} <- Ls]]}. @@ -589,385 +597,73 @@ aa_add_block_entry_ss([], _, Lbl2SS) -> %% Merge two sharing states when traversing the execution graph %% reverse post order. -aa_merge_ss(BlockLbl, NewSS, Lbl2SS) - when is_map_key(BlockLbl, Lbl2SS) -> - #{BlockLbl:=OrigSS} = Lbl2SS, - NewSize = maps:size(NewSS), - OrigSize = maps:size(OrigSS), - _ = ?aa_assert_ss(OrigSS), - _ = ?aa_assert_ss(NewSS), - - %% Always merge the smaller state into the larger. - Tmp = if NewSize < OrigSize -> - ?TM_DP("merging block ~p~n~p.~n~p.~n", - [BlockLbl, OrigSS, NewSS]), - aa_merge_continue(OrigSS, NewSS, maps:keys(NewSS), [], []); - true -> - ?TM_DP("merging block ~p~n~p.~n~p.~n", - [BlockLbl, NewSS, OrigSS]), - aa_merge_continue(NewSS, OrigSS, maps:keys(OrigSS), [], []) - end, - Lbl2SS#{BlockLbl=>Tmp}; +aa_merge_ss(BlockLbl, NewSS, Lbl2SS) when is_map_key(BlockLbl, Lbl2SS) -> + Lbl2SS#{BlockLbl=>beam_ssa_ss:merge(NewSS, map_get(BlockLbl, Lbl2SS))}; aa_merge_ss(BlockLbl, NewSS, Lbl2SS) -> Lbl2SS#{BlockLbl=>NewSS}. -aa_merge_continue(A, B, [V|Vars], ParentFixups, AliasFixups) -> - #{V:=BVas} = B, - case A of - #{V:=AVas} -> - ?TM_DP("merge ~p~n", [V]), - aa_merge_1(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups); - #{} -> - ?TM_DP("not in dest ~p~n", [V]), - %% V isn't in A, nothing to merge, add it. - aa_merge_continue(A#{V=>BVas}, B, Vars, ParentFixups, AliasFixups) - end; -aa_merge_continue(A0, _, [], ParentFixups, AliasFixups) -> - A = aa_merge_parent_fixups(A0, ParentFixups), - ?aa_assert_ss(aa_merge_alias_fixups(A, AliasFixups)). - -aa_merge_1(_V, Vas, Vas, A, B, Vars, ParentFixups, AliasFixups) -> - %% They are both the same, no change. - ?TM_DP("same~n"), - aa_merge_continue(A, B, Vars, ParentFixups, AliasFixups); -aa_merge_1(_V, #vas{status=aliased}, BVas, A, B, Vars, - ParentFixups, AliasFixups) -> - %% V is aliased in A, anything related to B becomes aliased. - ?TM_DP("force aliasB of ~p~n", [aa_related(BVas)]), - aa_merge_continue(A, B, Vars, ParentFixups, - aa_related(BVas)++AliasFixups); -aa_merge_1(V, AVas, #vas{status=aliased}, A, B, Vars, - ParentFixups, AliasFixups) -> - %% V is aliased in B, anything related to A becomes aliased. - ?TM_DP("force aliasA of ~p~n", [aa_related(AVas)]), - aa_merge_continue(A#{V=>#vas{status=aliased}}, B, Vars, - ParentFixups, - aa_related(AVas)++AliasFixups); -aa_merge_1(V, #vas{status=S}=AVas, #vas{status=S}=BVas, A, B, Vars, - ParentFixups, AliasFixups) - when S == unique ; S == as_parent -> - aa_merge_child(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups). - -aa_merge_child(V, #vas{child=Child}=AVas, #vas{child=Child}=BVas, - A, B, Vars, ParentFixups, AliasFixups) -> - ?TM_DP("child ~p, same~n", [Child]), - aa_merge_tuple(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups); -aa_merge_child(V, #vas{child=none}=AVas, #vas{child=Child}=BVas, - A, B, Vars, ParentFixups, AliasFixups) -> - %% BVas has aquired a derivation from a Phi, no conflict, but the - %% A side has to be updated with new parent information. - ?TM_DP("new child in B, ~p~n", [Child]), - aa_merge_tuple(V, AVas#vas{child=Child}, BVas, A#{V=>BVas}, - B, Vars, [{Child,V}|ParentFixups], AliasFixups); -aa_merge_child(V, AVas, #vas{child=none}=BVas, A, B, Vars, - ParentFixups, AliasFixups) -> - %% AVas has aquired a derivation from a Phi, no conflict, no - %% update of the state necessary. - ?TM_DP("no child in B~n"), - aa_merge_tuple(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups); -aa_merge_child(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups) -> - %% Different children, this leads to aliasing. - ?TM_DP("different children, force alias of ~p~n", - [aa_related(AVas)++aa_related(BVas)]), - aa_merge_continue( - A#{V=>#vas{status=aliased}}, B, Vars, - ParentFixups, - aa_related(AVas)++aa_related(BVas)++AliasFixups). - -aa_merge_tuple(V, #vas{tuple_elems=Es}=AVas, #vas{tuple_elems=Es}=BVas, - A, B, Vars, ParentFixups, AliasFixups) -> - %% The same tuple elements are extracted, no conflict. - ?TM_DP("same tuple elements~n"), - aa_merge_pair(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups); -aa_merge_tuple(V, #vas{tuple_elems=AEs}=AVas, #vas{tuple_elems=BEs}=BVas, - A, B, Vars, ParentFixups, AliasFixups) -> - %% This won't lead to aliasing if all elements are unique. - case aa_non_aliasing_tuple_elements(AEs++BEs) of - true -> - %% No aliasing, the elements are unique - ?TM_DP("different tuple elements, no aliasing~n"), - Elements = ordsets:union(AEs, BEs), - Vas = AVas#vas{tuple_elems=Elements}, - aa_merge_pair(V, Vas, BVas, A#{V=>Vas}, B, Vars, - ParentFixups, AliasFixups); - false -> - %% Aliasing occurred. - ?TM_DP("aliasing tuple elements, force ~p~n", - aa_related(AVas)++aa_related(BVas)), - aa_merge_continue(A#{V=>#vas{status=aliased}}, B, Vars, - ParentFixups, - aa_related(AVas)++aa_related(BVas)++AliasFixups) - end. - -aa_merge_pair(V, #vas{pair_elems=Es}=AVas, #vas{pair_elems=Es}=BVas, - A, B, Vars, ParentFixups, AliasFixups) -> - %% The same pair elements are extracted, no conflict. - ?TM_DP("same pairs~n"), - aa_merge_extracted(V, AVas, BVas, A, B, Vars, ParentFixups, AliasFixups); -aa_merge_pair(V, #vas{pair_elems=AEs}=AVas, #vas{pair_elems=BEs}=BVas, - A, B, Vars, ParentFixups, AliasFixups) -> - R = case {AEs,BEs} of - {{hd,H},{tl,T}} -> - {both,H,T}; - {{tl,T},{hd,H}} -> - {both,H,T}; - {E,none} -> - E; - {none,E} -> - E; - _ -> - alias - end, - case R of - alias -> - ?TM_DP("aliasing pair elements: ~p~n", [R]), - aa_merge_continue(A#{V=>#vas{status=aliased}}, B, Vars, - ParentFixups, - aa_related(AVas)++aa_related(BVas)++AliasFixups); - Pair -> - ?TM_DP("different pair elements, no aliasing~n"), - Vas = AVas#vas{pair_elems=Pair}, - aa_merge_extracted(V, Vas, BVas, A#{V=>Vas}, - B, Vars, ParentFixups, AliasFixups) - end. - -aa_merge_extracted(V, #vas{extracted=AEs}=AVas, #vas{extracted=BEs}, - A, B, Vars, ParentFixups, AliasFixups) -> - Extracted = ordsets:union(AEs, BEs), - aa_merge_continue(A#{V=>AVas#vas{extracted=Extracted}}, B, Vars, - ParentFixups, AliasFixups). - -aa_related(#vas{parents=Ps,child=Child,extracted=Ex}) -> - case Child of none -> - []; - Child -> - [Child] - end ++ Ps ++ Ex. - -aa_non_aliasing_tuple_elements(Elems) -> - aa_non_aliasing_tuple_elements(Elems, #{}). - -aa_non_aliasing_tuple_elements([{I,V}|Es], Seen) -> - case Seen of - #{I:=X} when X =/= V -> - false; - #{} -> - aa_non_aliasing_tuple_elements(Es, Seen#{I=>V}) - end; -aa_non_aliasing_tuple_elements([], _) -> - true. - -aa_merge_alias_fixups(SS, Fixups) -> - ?TM_DP("fixup: Forcing aliasing ~p~n", [Fixups]), - aa_set_status_1(Fixups, none, SS). - -aa_merge_parent_fixups(SS0, [{Child,Parent}|Fixups]) -> - ?TM_DP("fixup: Forcing parents ~p->~p~n", [Child,Parent]), - #{Child:=#vas{parents=Parents}=Vas} = SS0, - SS = SS0#{Child=>Vas#vas{parents=ordsets:add_element(Parent, Parents)}}, - aa_merge_parent_fixups(SS, Fixups); -aa_merge_parent_fixups(SS, []) -> - ?TM_DP("Parent fixups executed~n"), - SS. - %% Merge two sharing states when traversing the execution graph post %% order. The only thing the successor merging needs to to is to check %% if variables in the original SS have become aliased. aa_merge_ss_successor(BlockLbl, NewSS, Lbl2SS) -> #{BlockLbl:=OrigSS} = Lbl2SS, - Lbl2SS#{BlockLbl=>aa_merge_ss_successor(OrigSS, NewSS)}. - -aa_merge_ss_successor(Orig, New) -> - maps:fold(fun(V, Vas, Acc) -> - case New of - #{V:=Vas} -> - %% Nothing has changed for V. - Acc; - #{V:=#vas{status=aliased}} -> - aa_set_aliased(V, Acc); - #{} -> - %% V did not exist in New. - Acc - end - end, Orig, Orig). - -%% Add a new ssa variable to the sharing state and set its status. -aa_new_ssa_var(Var, Status, State) -> - ?ASSERT(false = maps:get(Var, State, false)), - State#{Var=>#vas{status=Status}}. + Lbl2SS#{BlockLbl=>beam_ssa_ss:merge(OrigSS, NewSS)}. aa_get_status(V=#b_var{}, State) -> - case State of - #{V:=#vas{status=as_parent,parents=Ps}} -> - aa_get_status(Ps, State); - #{V:=#vas{status=Status}} -> - Status - end; + beam_ssa_ss:get_status(V, State); aa_get_status(#b_literal{}, _State) -> - unique; -aa_get_status([V=#b_var{}], State) -> - aa_get_status(V, State); -aa_get_status([V=#b_var{}|Parents], State) -> - aa_meet(aa_get_status(V, State), aa_get_status(Parents, State)). + unique. +aa_get_status(V, State, Types) -> + case aa_is_plain_value(V, Types) of + true -> + unique; + false -> + aa_get_status(V, State) + end. %% aa_get_status but for instructions extracting values from pairs and %% tuples. aa_get_element_extraction_status(V=#b_var{}, State) -> - case State of - #{V:=#vas{status=aliased}} -> - aliased; - #{V:=#vas{tuple_elems=Elems}} when Elems =/= [] -> - unique; - #{V:=#vas{pair_elems=Elems}} when Elems =/= none -> - unique - end; + aa_get_status(V, State); aa_get_element_extraction_status(#b_literal{}, _State) -> unique. -aa_set_status(V=#b_var{}, aliased, State) -> - ?DP("Setting ~p to aliased.~n", [V]), - case State of - #{V:=#vas{status=unique,parents=[]}} -> - %% This is the initial value. - aa_set_status_1(V, none, State); - #{V:=#vas{status=aliased}} -> - %% No change - State; - #{V:=#vas{parents=Parents}} -> - %% V is derived from another value, so the status has to - %% be propagated to the parent(s). - aa_set_status(Parents, aliased, State) - end; -aa_set_status(_V=#b_var{}, unique, State) -> - ?ASSERT(true = case State of - #{_V:=#vas{status=unique}} -> true; - #{_V:=#vas{parents=Parents}} -> - [unique = aa_get_status(P, State) || P <- Parents], - true - end), - State; +aa_set_status(V=#b_var{}, Status, State) -> + ?DP("Setting ~p to ~p.~n", [V, Status]), + beam_ssa_ss:set_status(V, Status, State); aa_set_status(#b_literal{}, _Status, State) -> State; +aa_set_status(plain, _Status, State) -> + State; aa_set_status([X|T], Status, State) -> aa_set_status(X, Status, aa_set_status(T, Status, State)); aa_set_status([], _, State) -> State. -%% Propagate the aliased status to the children. -aa_set_status_1(#b_var{}=V, Parent, State0) -> - ?DP("aa_set_status_1: ~p, parent:~p~n~p.~n", [V,Parent,State0]), - #{V:=#vas{child=Child,extracted=Extracted,parents=Parents}} = State0, - State = State0#{V=>#vas{status=aliased}}, - Work = case Child of - none -> - []; - _ -> - [Child] - end ++ ordsets:del_element(Parent, Parents) ++ Extracted, - aa_set_status_1(Work, V, State); -aa_set_status_1([#b_var{}=V|Rest], Parent, State) -> - aa_set_status_1(Rest, Parent, aa_set_status_1(V, Parent, State)); -aa_set_status_1([], _Parent, State) -> - State. +aa_derive_from(Dst, Parents, State0) -> + aa_derive_from(Dst, Parents, #{}, State0). -aa_derive_from(Dst, [Parent|Parents], State0) -> - aa_derive_from(Dst, Parents, aa_derive_from(Dst, Parent, State0)); -aa_derive_from(_Dst, [], State0) -> +aa_derive_from(Dst, [Parent|Parents], Types, State0) -> + aa_derive_from(Dst, Parents, Types, + aa_derive_from1(Dst, Parent, Types, State0)); +aa_derive_from(_Dst, [], _, State0) -> State0; -aa_derive_from(#b_var{}, #b_literal{}, State) -> - State; -aa_derive_from(#b_var{}=Dst, #b_var{}=Parent, State) -> - ?DP("Deriving ~p from ~p~n~p.~n", [Dst,Parent,State]), - case State of - #{Dst:=#vas{status=aliased}} -> - %% Nothing to do, already aliased. This can happen when - %% handling Phis, no propagation to the parent should be - %% done. - ?aa_assert_ss(State); - #{Parent:=#vas{status=aliased}} -> - %% The parent is aliased, the child will become aliased. - ?aa_assert_ss(aa_set_aliased(Dst, State)); - #{Parent:=#vas{child=Child}} when Child =/= none -> - %% There already is a child, this will alias both Dst and Parent. - ?aa_assert_ss(aa_set_aliased([Dst,Parent], State)); - #{Parent:=#vas{child=none,tuple_elems=Elems}} when Elems =/= [] -> - %% There already is a child, this will alias both Dst and Parent. - ?aa_assert_ss(aa_set_aliased([Dst,Parent], State)); - #{Parent:=#vas{child=none,pair_elems=Elems}} when Elems =/= none -> - %% There already is a child, this will alias both Dst and Parent. - ?aa_assert_ss(aa_set_aliased([Dst,Parent], State)); - #{Dst:=#vas{parents=Parents}=ChildVas0, - Parent:=#vas{child=none}=ParentVas0} -> - %% Inherit the status of the parent. - ChildVas = - ChildVas0#vas{parents=ordsets:add_element(Parent, Parents), - status=as_parent}, - ParentVas = ParentVas0#vas{child=Dst}, - ?aa_assert_ss(State#{Dst=>ChildVas,Parent=>ParentVas}) - end. +aa_derive_from(Dst, Parent, Types, State0) -> + aa_derive_from1(Dst, Parent, Types, State0). -aa_prune_ss(SS, Live) -> - aa_prune_ss(SS, sets:to_list(Live), Live, #{}). -aa_prune_ss(SS, [V|Wanted], Live, Pruned) -> - case is_map_key(V, Pruned) of - false -> - %% This variable has to be kept, copy it, add it to the - %% set of live nodes and add the parents to the work list. - #{V:=#vas{parents=Ps}=Vas} = SS, - aa_prune_ss(SS, Ps++Wanted, - sets:add_element(V, Live), - Pruned#{V=>Vas}); - true -> - %% This variable is alread added. - aa_prune_ss(SS, Wanted, Live, Pruned) - end; -aa_prune_ss(_SS, [], Live, Pruned) -> - %% Now strip all references to variables not in the live set. - PruneRefs = fun(#vas{parents=Ps0,child=Child0,extracted=Es0, - tuple_elems=Ts0,pair_elems=Pes0}=Vas) -> - Ps = [P || P <- Ps0, sets:is_element(P, Live)], - Child = case sets:is_element(Child0, Live) of - true -> - Child0; - false -> - none - end, - Es = [E || E <- Es0, sets:is_element(E, Live)], - Ts = [E - || {_,Var}=E <- Ts0, sets:is_element(Var, Live)], - Pes = case Pes0 of - {_,X}=P -> - case sets:is_element(X, Live) of - true -> - P; - _ -> - none - end; - {both,X,Y}=P -> - case {sets:is_element(X, Live), - sets:is_element(Y, Live)} of - {true,true} -> - P; - {true,false} -> - {hd,X}; - {false,true} -> - {tl,Y}; - _ -> - none - end; - none -> - none - end, - Vas#vas{parents=Ps,child=Child,extracted=Es, - tuple_elems=Ts,pair_elems=Pes} - end, - #{V=>PruneRefs(Vas) || V:=Vas <- Pruned}. +aa_derive_from1(#b_var{}, #b_literal{}, _, State) -> + State; +aa_derive_from1(Dst, Parent, Types, State) -> + false = aa_is_plain_value(Parent, Types), %% Assertion + beam_ssa_ss:derive_from(#b_var{}=Dst, #b_var{}=Parent, State). aa_update_annotations(Funs, #aas{alias_map=AliasMap0,st_map=StMap0}=AAS) -> foldl(fun(F, {StMapAcc,AliasMapAcc}) -> #{F:=Lbl2SS0} = AliasMapAcc, #{F:=OptSt0} = StMapAcc, + #b_local{name=#b_literal{val=_N},arity=_A} = F, + ?DP("Updating annotations for ~p/~p~n", [_N,_A]), {OptSt,Lbl2SS} = aa_update_fun_annotation(OptSt0, Lbl2SS0, AAS#aas{caller=F}), @@ -986,6 +682,7 @@ aa_update_annotation_blocks([{?EXCEPTION_BLOCK,_}=Block|Blocks], aa_update_annotation_blocks(Blocks, [Block|Acc], Lbl2SS, AAS); aa_update_annotation_blocks([{Lbl, Block0}|Blocks], Acc, Lbl2SS0, AAS) -> Successors = beam_ssa:successors(Block0), + ?DP("Block ~p, successors: ~p.~n", [Lbl, Successors]), Lbl2SS = foldl(fun(?EXCEPTION_BLOCK, Lbl2SSAcc) -> %% What happens in the exception block %% can't influence anything in any of the @@ -997,6 +694,7 @@ aa_update_annotation_blocks([{Lbl, Block0}|Blocks], Acc, Lbl2SS0, AAS) -> end, Lbl2SS0, Successors), #{Lbl:=SS} = Lbl2SS, Block = aa_update_annotation_block(Block0, SS, AAS), + ?DP("Block ~p done.~n", [Lbl]), aa_update_annotation_blocks(Blocks, [{Lbl,Block}|Acc], Lbl2SS, AAS); aa_update_annotation_blocks([], Acc, Lbl2SS, _AAS) -> {Acc,Lbl2SS}. @@ -1026,8 +724,12 @@ aa_update_annotation(I=#b_set{args=[Pair],op={bif,hd}}, SS, AAS) -> aa_update_annotation(I=#b_set{args=[Pair],op={bif,tl}}, SS, AAS) -> Args = [{Pair,aa_get_element_extraction_status(Pair, SS)}], aa_update_annotation1(Args, I, AAS); -aa_update_annotation(I=#b_set{args=Args0}, SS, AAS) -> - Args = [{V,aa_get_status(V, SS)} || #b_var{}=V <- Args0], +aa_update_annotation(I=#b_set{args=Args0,anno=Anno,dst=_Dst}, SS, AAS) -> + Types = maps:get(arg_types, Anno, #{}), + Arg2Type = #{V=>maps:get(Idx, Types, any) + || {Idx,#b_var{}=V} <- lists:enumerate(0, 1, Args0)}, + Args = [{V,aa_get_status(V, SS, Arg2Type)} || #b_var{}=V <- Args0], + ?DP("Args with status for ~p: ~p~n", [_Dst, Args]), aa_update_annotation1(Args, I, AAS); aa_update_annotation(I=#b_ret{arg=#b_var{}=V}, SS, AAS) -> aa_update_annotation1(aa_get_status(V, SS), I, AAS); @@ -1037,116 +739,141 @@ aa_update_annotation(I, _SS, _AAS) -> aa_update_annotation1(ArgsStatus, I=#b_set{anno=Anno0,args=Args,op=Op}, AAS) -> - {Aliased,Unique} = - foldl(fun({#b_var{}=V,aliased}, {As,Us}) -> - {ordsets:add_element(V, As), Us}; - ({#b_var{}=V,unique}, {As,Us}) -> - {As, ordsets:add_element(V, Us)}; - (_, S) -> - S - end, {ordsets:new(),ordsets:new()}, ArgsStatus), - Anno1 = case Aliased of - [] -> maps:remove(aliased, Anno0); - _ -> Anno0#{aliased => Aliased} - end, - Anno2 = case Unique of - [] -> maps:remove(unique, Anno1); - _ -> Anno1#{unique => Unique} - end, + Anno1 = foldl(fun({#b_var{}=V,S}, Acc) -> + aa_update_annotation_for_var(V, S, Acc); + (_, Acc) -> + Acc + end, Anno0, ArgsStatus), + %% Alias analysis indicate the alias status of the instruction + %% arguments before the instruction is executed. For transforms in + %% later stages, we need to know if a particular argument dies + %% with this instruction or not. As we have the kill map available + %% during this analysis pass, it is more efficient to add an + %% annotation now, instead of trying to reconstruct the + %% kill map during the later transform pass. Anno = case {Op,Args} of {bs_create_bin,[#b_literal{val=append},_,Var|_]} -> - %% Alias analysis indicate the alias status of the - %% instruction arguments before the instruction is - %% executed. For the private-append optimization we - %% need to know if the first fragment dies with - %% this instruction or not. Adding an annotation - %% here, during alias analysis, is more efficient - %% than trying to reconstruct information in the - %% kill map during the private-append pass. - #aas{caller=Caller,kills=KillsMap} = AAS, - #b_set{dst=Dst} = I, - KillMap = maps:get(Caller, KillsMap), - Dies = sets:is_element(Var, map_get(Dst, KillMap)), - Anno2#{first_fragment_dies => Dies}; + %% For the private-append optimization we need to + %% know if the first fragment dies. + Anno1#{first_fragment_dies => dies_at(Var, I, AAS)}; + {update_record,[_Hint,_Size,Src|_Updates]} -> + %% One of the requirements for valid destructive + %% record updates is that the source tuple dies + %% with the update. + Anno1#{source_dies => dies_at(Src, I, AAS)}; _ -> - Anno2 + Anno1 end, I#b_set{anno=Anno}; aa_update_annotation1(Status, I=#b_ret{arg=#b_var{}=V,anno=Anno0}, _AAS) -> - Anno = case Status of - aliased -> - maps:remove(unique, Anno0#{aliased=>[V]}); - unique -> - maps:remove(aliased, Anno0#{unique=>[V]}) - end, + Anno = aa_update_annotation_for_var(V, Status, Anno0), I#b_ret{anno=Anno}. +aa_update_annotation_for_var(Var, Status, Anno0) -> + Aliased0 = maps:get(aliased, Anno0, []), + Unique0 = maps:get(unique, Anno0, []), + {Aliased, Unique} = case Status of + aliased -> + {ordsets:add_element(Var, Aliased0), + ordsets:del_element(Var, Unique0)}; + unique -> + {ordsets:del_element(Var, Aliased0), + ordsets:add_element(Var, Unique0)} + end, + Anno1 = case Aliased of + [] -> + maps:remove(aliased, Anno0); + _ -> + Anno0#{aliased=>Aliased} + end, + case Unique of + [] -> + maps:remove(unique, Anno1); + _ -> + Anno1#{unique=>Unique} + end. + +%% Return true if Var dies with its use (assumed, not checked) in the +%% instruction. +dies_at(Var, #b_set{dst=Dst}, AAS) -> + #aas{caller=Caller,kills=KillsMap} = AAS, + KillMap = map_get(Caller, KillsMap), + sets:is_element(Var, map_get(Dst, KillMap)). + aa_set_aliased(Args, SS) -> aa_set_status(Args, aliased, SS). aa_alias_all(SS) -> - aa_set_aliased(maps:keys(SS), SS). - -aa_register_extracted(Extracted, Aggregate, State) -> - ?DP("REGISTER ~p: ~p~n", [Aggregate,Extracted]), - #{Aggregate:=#vas{extracted=ExVars}=AggVas0, - Extracted:=#vas{parents=Parents}=ExVas0} = State, - AggVas = AggVas0#vas{extracted=ordsets:add_element(Extracted, ExVars)}, - ExVas = ExVas0#vas{status=as_parent, - parents=ordsets:add_element(Aggregate, Parents)}, - State#{Aggregate=>AggVas, Extracted=>ExVas}. - -aa_meet(#b_var{}=Var, OtherStatus, State) -> - Status = aa_get_status(Var, State), - aa_set_status(Var, aa_meet(OtherStatus, Status), State); -aa_meet(#b_literal{}, _SetStatus, State) -> - State; -aa_meet([Var|Vars], [Status|Statuses], State) -> - aa_meet(Vars, Statuses, aa_meet(Var, Status, State)); -aa_meet([], [], State) -> - State. - -aa_meet(StatusA, StatusB) -> - case {StatusA, StatusB} of - {_,aliased} -> aliased; - {aliased, _} -> aliased; - {unique, unique} -> unique - end. - -aa_meet([H|T]) -> - aa_meet(H, aa_meet(T)); -aa_meet([]) -> - unique. + aa_set_aliased(beam_ssa_ss:variables(SS), SS). %% %% Type is always less specific or exactly the same as one of the %% types in StatusByType, so we need to meet all possible statuses for %% the call site. %% +aa_get_status_by_type(none, _StatusByType) -> + %% The function did not return, conservatively report the status + %% as aliased. + aliased; aa_get_status_by_type(Type, StatusByType) -> Statuses = [Status || Candidate := Status <- StatusByType, beam_types:meet(Type, Candidate) =/= none], - aa_meet(Statuses). + case Statuses of + [] -> + %% No matching type was found, this can happen when the + %% returned type, for example, is a #t_union{}. For now, + %% conservatively return a status of aliased. + aliased; + _ -> + beam_ssa_ss:meet_in_args(Statuses) + end. -%% Predicate to check if all variables in `Vars` dies at `Where`. --spec aa_all_dies([#b_var{}], kill_loc(), #aas{}) -> boolean(). -aa_all_dies(Vars, Where, #aas{caller=Caller,kills=Kills}) -> +aa_alias_surviving_args(Args, Call, SS, Anno, AAS) -> + KillSet = aa_killset_for_instr(Call, AAS), + ArgTypes = maps:get(arg_types, Anno, #{}), + aa_alias_surviving_args1(Args, 0, SS, ArgTypes, KillSet). + +aa_alias_surviving_args1([A|Args], Idx, SS0, ArgTypes, KillSet) -> + SS = case sets:is_element(A, KillSet) of + true -> + SS0; + false -> + aa_set_status(A, aliased, SS0) + end, + aa_alias_surviving_args1(Args, Idx+1, SS, ArgTypes, KillSet); +aa_alias_surviving_args1([], _Idx, SS, _ArgTypes, _KillSet) -> + SS. + +%% Return the kill-set for the instruction defining Dst. +aa_killset_for_instr(Dst, #aas{caller=Caller,kills=Kills}) -> KillMap = map_get(Caller, Kills), - KillSet = map_get(Where, KillMap), - aa_all_dies(Vars, KillSet). + map_get(Dst, KillMap). -aa_all_dies([#b_literal{}|Vars], KillSet) -> - aa_all_dies(Vars, KillSet); -aa_all_dies([#b_var{}=V|Vars], KillSet) -> - case sets:is_element(V, KillSet) of +%% Predicate to check if all variables in `Vars` dies at `Where`. +-spec aa_all_dies([#b_var{}], kill_loc(), type_db(), #aas{}) -> boolean(). +aa_all_dies(Vars, Where, Types, AAS) -> + KillSet = aa_killset_for_instr(Where, AAS), + aa_all_dies1(Vars, Types, KillSet). + +%% As aa_all_dies/4 but without type information. +aa_all_dies(Vars, Where, AAS) -> + aa_all_dies(Vars, Where, #{}, AAS). + +aa_all_dies1([#b_literal{}|Vars], Types, KillSet) -> + aa_all_dies1(Vars, Types, KillSet); +aa_all_dies1([#b_var{}=V|Vars], Types, KillSet) -> + case aa_dies(V, Types, KillSet) of true -> - aa_all_dies(Vars, KillSet); + aa_all_dies1(Vars, Types, KillSet); false -> false end; -aa_all_dies([], _) -> +aa_all_dies1([], _, _) -> true. +aa_dies(V, Types, KillSet) -> + sets:is_element(V, KillSet) orelse aa_is_plain_value(V, Types). + aa_alias_if_args_dont_die(Args, Where, SS, AAS) -> case aa_all_dies(Args, Where, AAS) of true -> @@ -1162,40 +889,146 @@ aa_alias_inherit_and_alias_if_arg_does_not_die(Dst, Arg, SS0, AAS) -> aa_set_status(Dst, aa_get_status(Arg, SS1), SS1). %% Check that a variable in Args only occurs once and that it is not -%% aliased, literals are ignored. -aa_all_vars_unique(Args, SS) -> - aa_all_vars_unique(Args, #{}, SS). - -aa_all_vars_unique([#b_literal{}|Args], Seen,SS) -> - aa_all_vars_unique(Args, Seen, SS); -aa_all_vars_unique([#b_var{}=V|Args], Seen, SS) -> +%% aliased, literals, values of types which fit into a register are +%% ignored. +aa_all_vars_unique(Args, Types, SS) -> + aa_all_vars_unique(Args, #{}, Types, SS). + +aa_all_vars_unique([#b_literal{}|Args], Seen, Types, SS) -> + aa_all_vars_unique(Args, Seen, Types, SS); +aa_all_vars_unique([#b_var{}=V|Args], Seen, Types, SS) -> aa_get_status(V, SS) =:= unique andalso case Seen of #{ V := _ } -> false; #{} -> - aa_all_vars_unique(Args, Seen#{V => true }, SS) + aa_all_vars_unique(Args, Seen#{V => true }, Types, SS) end; -aa_all_vars_unique([], _, _) -> +aa_all_vars_unique([], _, _, _) -> true. +%% Predicate to test whether a variable is of a type which is just a +%% value or behaves as it was (for example pid, ports and references). +aa_is_plain_value(V, Types) -> + case Types of + #{V:=Type} -> + aa_is_plain_type(Type); + #{} -> + false + end. + +aa_is_plain_type(Type) -> + case Type of + #t_atom{} -> + true; + #t_number{} -> + true; + #t_integer{} -> + true; + #t_float{} -> + true; + 'identifier' -> + true; + 'pid' -> + true; + 'port' -> + true; + 'reference' -> + true; + _ -> + false + end. + +aa_map_arg_to_type(Args, Types) -> + aa_map_arg_to_type(Args, Types, #{}, 0). + +aa_map_arg_to_type([A|Args], Types, Acc0, Idx) -> + Acc = case Types of + #{Idx:=T} -> + Acc0#{A=>T}; + #{} -> + Acc0 + end, + aa_map_arg_to_type(Args, Types, Acc, Idx+1); +aa_map_arg_to_type([], _, Acc, _) -> + Acc. + aa_construct_term(Dst, Values, SS, AAS) -> - case aa_all_vars_unique(Values, SS) - andalso aa_all_dies(Values, Dst, AAS) of + aa_construct_term(Dst, Values, #{}, SS, AAS). + +aa_construct_term(Dst, Values, Types, SS, AAS) -> + ?DP("Constructing term in ~p~n values: ~p~n types: ~p~n au: ~p, ad: ~p~n", + [Dst, Values, Types, aa_all_vars_unique(Values, Types, SS), + aa_all_dies(Values, Dst, Types, AAS)]), + case aa_all_vars_unique(Values, Types, SS) + andalso aa_all_dies(Values, Dst, Types, AAS) of true -> - aa_derive_from(Dst, Values, SS); + ?DP(" deriving ~p from ~p~n", [Dst, Values]), + aa_derive_from(Dst, Values, Types, SS); false -> - aa_set_aliased([Dst|Values], SS) + Alias = [V || V <- [Dst|Values], not aa_is_plain_value(V, Types)], + ?DP(" aliasing ~p~n", [Alias]), + aa_set_aliased(Alias, SS) end. -aa_update_record_get_vars([#b_literal{}, Value|Updates]) -> - [Value|aa_update_record_get_vars(Updates)]; +aa_construct_tuple(Dst, IdxValues, Types, SS, AAS) -> + KillSet = aa_killset_for_instr(Dst, AAS), + ?DP("Constructing tuple in ~p~n from: ~p~n", + [Dst, [#{idx=>Idx,v=>V,status=>aa_get_status(V, SS, Types), + killed=>aa_dies(V, Types, KillSet), + plain=>aa_is_plain_value(V, Types)} + || {Idx,V} <- IdxValues]]), + ?DP("~p~n", [SS]), + aa_build_tuple_or_pair(Dst, IdxValues, Types, KillSet, SS, []). + +aa_build_tuple_or_pair(Dst, [{Idx,#b_literal{val=Lit}}|IdxValues], Types, + KillSet, SS0, Sources) + when is_atom(Lit); is_number(Lit); is_map(Lit); + is_bitstring(Lit); is_function(Lit); Lit =:= [] -> + aa_build_tuple_or_pair(Dst, IdxValues, Types, KillSet, + SS0, [{Idx,plain}|Sources]); +aa_build_tuple_or_pair(Dst, [{Idx,V}=IdxVar|IdxValues], Types, + KillSet, SS0, Sources) -> + case aa_is_plain_value(V, Types) of + true -> + %% Does not need to be tracked. + aa_build_tuple_or_pair(Dst, IdxValues, Types, + KillSet, SS0, [{Idx,plain}|Sources]); + false -> + SS = case aa_dies(V, Types, KillSet) of + true -> + SS0; + false -> + aa_set_aliased(V, SS0) + end, + aa_build_tuple_or_pair(Dst, IdxValues, Types, + KillSet, SS, [IdxVar|Sources]) + end; +aa_build_tuple_or_pair(Dst, [], _Types, _KillSet, SS, Sources) -> + ?DP(" embedding ~p~n", [Sources]), + R = beam_ssa_ss:embed_in(Dst, Sources, SS), + R. + +aa_construct_pair(Dst, Args0, Types, SS, AAS) -> + KillSet = aa_killset_for_instr(Dst, AAS), + [Hd,Tl] = Args0, + ?DP("Constructing pair in ~p~n from ~p and ~p~n~p~n", [Dst,Hd,Tl,SS]), + Args = [{hd,Hd},{tl,Tl}], + aa_build_tuple_or_pair(Dst, Args, Types, KillSet, SS, []). + +aa_update_record_get_vars([#b_literal{val=I}, Value|Updates]) -> + [{I-1,Value}|aa_update_record_get_vars(Updates)]; aa_update_record_get_vars([]) -> []. aa_bif(Dst, element, [#b_literal{val=Idx},Tuple], SS, _AAS) when is_integer(Idx), Idx > 0 -> - aa_tuple_extraction(Dst, Tuple, #b_literal{val=Idx-1}, SS); + %% The element bif is always rewritten to a get_tuple_element + %% instruction when the index is an integer and the second + %% argument is a known to be a tuple. Therefore this code is only + %% reached when the type of is unknown, thus there is no point in + %% trying to provide aa_tuple_extraction/5 with type information. + aa_tuple_extraction(Dst, Tuple, #b_literal{val=Idx-1}, #{}, SS); aa_bif(Dst, element, [#b_literal{},Tuple], SS, _AAS) -> %% This BIF will fail, but in order to avoid any later transforms %% making use of uniqueness, conservatively alias. @@ -1203,8 +1036,16 @@ aa_bif(Dst, element, [#b_literal{},Tuple], SS, _AAS) -> aa_bif(Dst, element, [#b_var{},Tuple], SS, _AAS) -> aa_set_aliased([Dst,Tuple], SS); aa_bif(Dst, hd, [Pair], SS, _AAS) -> + %% The hd bif is always rewritten to a get_hd instruction when the + %% argument is known to be a pair. Therefore this code is only + %% reached when the type of is unknown, thus there is no point in + %% trying to provide aa_pair_extraction/5 with type information. aa_pair_extraction(Dst, Pair, hd, SS); aa_bif(Dst, tl, [Pair], SS, _AAS) -> + %% The tl bif is always rewritten to a get_tl instruction when the + %% argument is known to be a pair. Therefore this code is only + %% reached when the type of is unknown, thus there is no point in + %% trying to provide aa_pair_extraction/5 with type information. aa_pair_extraction(Dst, Pair, tl, SS); aa_bif(Dst, map_get, [_Key,Map], SS, AAS) -> aa_map_extraction(Dst, Map, SS, AAS); @@ -1228,37 +1069,32 @@ aa_bif(Dst, Bif, Args, SS, _AAS) -> aa_set_aliased([Dst|Args], SS) end. -aa_phi(Dst, Args0, SS) -> +aa_phi(Dst, Args0, Anno, SS0, AAS) -> Args = [V || {V,_} <- Args0], + SS = aa_alias_surviving_args(Args, {phi,Dst}, SS0, Anno, AAS), aa_derive_from(Dst, Args, SS). aa_call(Dst, [#b_local{}=Callee|Args], Anno, SS0, - #aas{alias_map=AliasMap,st_map=StMap}=AAS0) -> + #aas{alias_map=AliasMap,st_map=StMap,cnt=Cnt0}=AAS0) -> #b_local{name=#b_literal{val=_N},arity=_A} = Callee, ?DP("A Call~n callee: ~p/~p~n args: ~p~n", [_N, _A, Args]), - case AliasMap of - #{Callee:=#{0:=CalleeSS}=Lbl2SS} -> + case is_map_key(Callee, AliasMap) of + true -> ?DP(" The callee is known~n"), - #opt_st{args=CalleeArgs} = map_get(Callee, StMap), - ?DP(" callee args: ~p~n", [CalleeArgs]), + #opt_st{args=_CalleeArgs} = map_get(Callee, StMap), + ?DP(" callee args: ~p~n", [_CalleeArgs]), ?DP(" caller args: ~p~n", [Args]), - ?DP(" args in caller: ~p~n", - [[{Arg, aa_get_status(Arg, SS0)} || Arg <- Args]]), - ArgStates = [ aa_get_status(Arg, CalleeSS) || Arg <- CalleeArgs], - ?DP(" callee arg states: ~p~n", [ArgStates]), - AAS = aa_add_call_info(Callee, Args, SS0, AAS0), - SS = aa_meet(Args, ArgStates, SS0), - ?DP(" meet: ~p~n", - [[{Arg, aa_get_status(Arg, SS)} || Arg <- Args]]), - ?DP(" callee-ss ~p~n", [CalleeSS]), + SS1 = aa_alias_surviving_args(Args, Dst, SS0, Anno, AAS0), + ?DP(" caller ss before call:~n ~p.~n", [SS1]), + #aas{alias_map=AliasMap} = AAS = + aa_add_call_info(Callee, Args, SS1, AAS0), + #{Callee:=#{0:=_CalleeSS}=Lbl2SS} = AliasMap, + ?DP(" callee ss: ~p~n", [_CalleeSS]), + ?DP(" caller ss after call: ~p~n", [SS1]), + ReturnStatusByType = maps:get(returns, Lbl2SS, #{}), ?DP(" status by type: ~p~n", [ReturnStatusByType]), - ReturnedType = case Anno of - #{ result_type := ResultType } -> - ResultType; - #{} -> - any - end, + ReturnedType = maps:get(result_type, Anno, any), %% ReturnedType is always less specific or exactly the %% same as one of the types in ReturnStatusByType. ?DP(" returned type: ~s~n", @@ -1266,8 +1102,11 @@ aa_call(Dst, [#b_local{}=Callee|Args], Anno, SS0, ResultStatus = aa_get_status_by_type(ReturnedType, ReturnStatusByType), ?DP(" result status: ~p~n", [ResultStatus]), - {aa_set_status(Dst, ResultStatus, SS), AAS}; - #{} -> + {SS,Cnt} = + beam_ssa_ss:set_call_result(Dst, ResultStatus, SS1, Cnt0), + ?DP("~p~n", [SS]), + {SS, AAS#aas{cnt=Cnt}}; + false -> %% We don't know anything about the function, don't change %% the status of any variables {SS0, AAS0} @@ -1285,44 +1124,51 @@ aa_call(Dst, [_Callee|Args], _Anno, SS, AAS) -> %% Incorporate aliasing information for the arguments to a call when %% analysing the body of a function into the global state. -aa_add_call_info(Callee, Args, SS0, #aas{call_args=Info0}=AAS) -> - ArgStats = [aa_get_status(Arg, SS0) || Arg <- Args], - #{Callee := Stats} = Info0, - NewStats = [aa_meet(A, B) || {A,B} <- zip(Stats, ArgStats)], - Info = Info0#{Callee => NewStats}, - AAS#aas{call_args=Info}. - -aa_get_call_args_status(Args, Callee, #aas{call_args=Info}) -> - #{ Callee := Status } = Info, - zip(Args, Status). +aa_add_call_info(Callee, Args, SS0, + #aas{call_args=InInfo0,caller=_Caller}=AAS) -> + #{Callee := InStatus0} = InInfo0, + ?DBG(#b_local{name=#b_literal{val=_CN},arity=_CA} = _Caller), + ?DBG(#b_local{name=#b_literal{val=_N},arity=_A} = Callee), + ?DP("Adding call info for ~p/~p when called by ~p/~p~n" + " args: ~p.~n ss:~p.~n", [_N,_A,_CN,_CA,Args,SS0]), + InStatus = beam_ssa_ss:merge_in_args(Args, InStatus0, SS0), + ?DP(" orig in-info: ~p.~n", [InStatus0]), + ?DP(" updated in-info for ~p/~p:~n ~p.~n", [_N,_A,InStatus]), + InInfo = InInfo0#{Callee => InStatus}, + AAS#aas{call_args=InInfo}. + +aa_init_fun_ss(Args, FunId, #aas{call_args=Info,st_map=StMap}) -> + #{FunId:=ArgsStatus} = Info, + #{FunId:=#opt_st{cnt=Cnt}} = StMap, + ?DP("aa_init_fun_ss: ~p~n args: ~p~n status: ~p~n cnt: ~p~n", + [FunId,Args,ArgsStatus,Cnt]), + beam_ssa_ss:new(Args, ArgsStatus, Cnt). %% Pair extraction. -aa_pair_extraction(Dst, #b_var{}=Pair, Element, SS) -> - case SS of - #{Pair:=#vas{status=aliased}} -> - %% The pair is aliased, so what is extracted will be aliased. - aa_set_aliased(Dst, SS); - #{Pair:=#vas{pair_elems={both,_,_}}} -> - %% Both elements have already been extracted. - aa_set_aliased([Dst,Pair], SS); - #{Pair:=#vas{pair_elems=none}=Vas} -> - %% Nothing has been extracted from this pair yet. - aa_register_extracted( - Dst, Pair, - SS#{Pair=>Vas#vas{pair_elems={Element,Dst}}}); - #{Pair:=#vas{pair_elems={Element,_}}} -> - %% This element has already been extracted. - aa_set_aliased([Dst,Pair], SS); - #{Pair:=#vas{pair_elems={tl,T}}=Vas} when Element =:= hd -> - %% Both elements have now been extracted, but no aliasing. - aa_register_extracted(Dst, Pair, - SS#{Pair=>Vas#vas{pair_elems={both,Dst,T}}}); - #{Pair:=#vas{pair_elems={hd,H}}=Vas} when Element =:= tl -> - %% Both elements have now been extracted, but no aliasing. - aa_register_extracted(Dst, Pair, - SS#{Pair=>Vas#vas{pair_elems={both,H,Dst}}}) +aa_pair_extraction(Dst, Pair, Element, SS) -> + aa_pair_extraction(Dst, Pair, Element, any, SS). + +aa_pair_extraction(Dst, #b_var{}=Pair, Element, Type, SS) -> + IsPlainValue = case {Type,Element} of + {#t_cons{type=Ty},hd} -> + aa_is_plain_type(Ty); + {#t_cons{terminator=Ty},tl} -> + aa_is_plain_type(Ty); + _ -> + %% There is no type information, + %% conservatively assume this isn't a plain + %% value. + false + end, + case IsPlainValue of + true -> + %% A plain value was extracted, it doesn't change the + %% alias status of Dst nor the pair. + SS; + false -> + beam_ssa_ss:extract(Dst, Pair, Element, SS) end; -aa_pair_extraction(_Dst, #b_literal{}, _Element, SS) -> +aa_pair_extraction(_Dst, #b_literal{}, _Element, _, SS) -> SS. aa_map_extraction(Dst, Map, SS, AAS) -> @@ -1331,32 +1177,29 @@ aa_map_extraction(Dst, Map, SS, AAS) -> aa_alias_inherit_and_alias_if_arg_does_not_die(Dst, Map, SS, AAS)). %% Extracting elements from a tuple. -aa_tuple_extraction(Dst, #b_var{}=Tuple, #b_literal{val=I}, SS) -> - case SS of - #{Tuple:=#vas{status=aliased}} -> - %% The tuple is aliased, so what is extracted will be - %% aliased. - aa_set_aliased(Dst, SS); - #{Tuple:=#vas{child=Child}} when Child =/= none -> - %% Something has already been derived from the tuple. - aa_set_aliased([Dst,Tuple], SS); - #{Tuple:=#vas{tuple_elems=[]}=TupleVas} -> - %% Nothing has been extracted from this tuple yet. - aa_register_extracted( - Dst, Tuple, SS#{Tuple=>TupleVas#vas{tuple_elems=[{I,Dst}]}}); - #{Tuple:=#vas{tuple_elems=Elems0}=TupleVas} -> - case [ Idx || {Idx,_} <- Elems0, I =:= Idx] of - [] -> - %% This element has not been extracted. - Elems = ordsets:add_element({I,Dst}, Elems0), - aa_register_extracted( - Dst, Tuple, SS#{Tuple=>TupleVas#vas{tuple_elems=Elems}}); - _ -> - %% This element is already extracted -> aliasing - aa_set_aliased([Dst,Tuple], SS) - end +aa_tuple_extraction(Dst, #b_var{}=Tuple, #b_literal{val=I}, Types, SS) -> + TupleType = maps:get(0, Types, any), + TypeIdx = I+1, %% In types tuple indices starting at zero. + IsPlainValue = case TupleType of + #t_tuple{elements=#{TypeIdx:=T}} -> + aa_is_plain_type(T); + _ -> + %% There is no type information, + %% conservatively assume this isn't a plain + %% value. + false + end, + ?DP("tuple-extraction dst:~p, tuple: ~p, idx: ~p,~n" + " type: ~p,~n plain: ~p~n", + [Dst, Tuple, I, TupleType, IsPlainValue]), + if IsPlainValue -> + %% A plain value was extracted, it doesn't change the + %% alias status of Dst nor the tuple. + SS; + true -> + beam_ssa_ss:extract(Dst, Tuple, I, SS) end; -aa_tuple_extraction(_, #b_literal{}, _, SS) -> +aa_tuple_extraction(_, #b_literal{}, _, _, SS) -> SS. aa_make_fun(Dst, Callee=#b_local{name=#b_literal{}}, @@ -1417,141 +1260,119 @@ aa_breadth_first([], [], _Seen, _FuncDb) -> aa_breadth_first([], Next, Seen, FuncDb) -> aa_breadth_first(Next, [], Seen, FuncDb). --ifdef(EXTRA_ASSERTS). - --spec aa_assert_ss(sharing_state()) -> sharing_state(). - -aa_assert_ss(SS) -> - try - maps:foreach( - fun(_V, #vas{status=aliased}=Vas) -> - %% An aliased variable should not have extra info. - [] = Vas#vas.parents, - none = Vas#vas.child, - [] = Vas#vas.extracted, - [] = Vas#vas.tuple_elems, - none = Vas#vas.pair_elems, - ok; - (V, #vas{status=unique,child=Child,extracted=Es, - tuple_elems=Ts,pair_elems=Pair}=Vas) -> - [] = Vas#vas.parents, - aa_assert_extracted(Es, Ts, Pair, V), - aa_assert_parent_of(V, Child, SS), - aa_assert_parent_of(V, Es, SS), - aa_assert_pair(Pair, V, SS), - aa_assert_tuple_elems(Ts, V, SS); - (V, #vas{status=as_parent,parents=Ps,child=Child,extracted=Es, - tuple_elems=Ts,pair_elems=Pair}) -> - aa_assert_not_aliased( - Ps, SS, - io_lib:format("as parent of ~p should not be aliased.", - [V])), - aa_assert_extracted(Es, Ts, Pair, V), - aa_assert_parent_of(Ps, V, SS), - aa_assert_parent_of(V, Child, SS), - aa_assert_parent_of(V, Es, SS), - aa_assert_pair(Pair, V, SS), - aa_assert_tuple_elems(Ts, V, SS) - end, SS) - of - _ -> SS - catch {assertion_failure, V, Desc} -> - io:format("Malformed SS~n~p~n~p ~s~n", [SS, V, Desc]), - exit(assertion_failure) - end. - -%% Check that V is a parent of Child -aa_assert_parent_of(_V, none, _SS) -> - ok; -aa_assert_parent_of(#b_var{}=V, #b_var{}=Child, SS) -> - case SS of - #{Child:=#vas{status=as_parent,parents=Ps}} -> - case ordsets:is_element(V, Ps) of - true -> - ok; - false -> - throw({assertion_failure, V, - io_lib:format( - "child ~p does not have ~p as parent", - [Child, V])}) - end; - #{} -> - throw({assertion_failure, V, - io_lib:format( - "child ~p does not have status as_parent", [Child])}) - end; -aa_assert_parent_of(#b_var{}=V, [P|Ps], SS) -> - aa_assert_parent_of(V, P, SS), - aa_assert_parent_of(V, Ps, SS); -aa_assert_parent_of([V|Vs], Child, SS) -> - aa_assert_parent_of(V, Child, SS), - aa_assert_parent_of(Vs, Child, SS); -aa_assert_parent_of(_, [], _) -> - true; -aa_assert_parent_of([], _, _) -> - true. +expand_record_update(#opt_st{ssa=Linear0,cnt=First,anno=Anno0}=OptSt) -> + {Linear,Cnt} = eru_blocks(Linear0, First), + Anno = Anno0#{orig_cnt=>First}, + OptSt#opt_st{ssa=Linear,cnt=Cnt,anno=Anno}. + +eru_blocks(Linear, First) -> + eru_blocks(Linear, First, []). + +eru_blocks([{Lbl,#b_blk{is=Is0}=Blk}|Rest], First, Acc) -> + {Is,Next} = eru_is(Is0, First, []), + eru_blocks(Rest, Next, [{Lbl,Blk#b_blk{is=Is}}|Acc]); +eru_blocks([], Cnt, Acc) -> + {reverse(Acc),Cnt}. + +eru_is([#b_set{op=update_record, + args=[_Hint,#b_literal{val=Size},Src|Updates]=Args, + anno=Anno0}=I0|Rest], First, Acc) -> + ArgTypes0 = maps:get(arg_types, Anno0, #{}), + TupleType = maps:get(2, ArgTypes0, any), + {Extracts,ExtraArgs,Next,ArgTypes} = + eru_args(Updates, First, Src, Size, TupleType, ArgTypes0), + Anno = if map_size(ArgTypes) =:= 0 -> + Anno0; + true -> + Anno0#{arg_types=>ArgTypes} + end, + I = I0#b_set{args=Args++ExtraArgs,anno=Anno}, + eru_is(Rest, Next, [I|Extracts]++Acc); +eru_is([I|Rest], First, Acc) -> + eru_is(Rest, First, [I|Acc]); +eru_is([], First, Acc) -> + {reverse(Acc), First}. + +eru_args(Updates, First, Src, Size, TupleType, ArgTypes) -> + eru_args1(Updates, sets:from_list(lists:seq(1, Size), [{version,2}]), + 4, First, Src, TupleType, ArgTypes). + +eru_args1([#b_literal{val=Idx},_Val|Updates], + Remaining, ArgIdx, First, Src, TupleType, ArgTypes) -> + eru_args1(Updates, sets:del_element(Idx, Remaining), ArgIdx+2, + First, Src, TupleType, ArgTypes); +eru_args1([], Remaining, ArgIdx, First, Src, TupleType, ArgTypes) -> + eru_args2(sets:to_list(Remaining), [], [], ArgIdx, + First, Src, TupleType, ArgTypes). + +eru_args2([Idx|Remaining], Extracts, Args0, ArgIdx, First, + Src, TupleType, ArgTypes0) -> + Dst = #b_var{name=First}, + I = #b_set{dst=Dst,op=get_tuple_element, + args=[Src,#b_literal{val=Idx-1}], + anno=#{arg_types=>#{0=>TupleType}}}, + ArgTypes = case TupleType of + #t_tuple{elements=#{Idx:=ET}} -> + ArgTypes0#{ArgIdx=>ET}; + _ -> + ArgTypes0 + end, + %% built in reverse to make argument indexes end up in the right + %% order after the final reverse. + Args = [Dst,#b_literal{val=Idx}|Args0], + eru_args2(Remaining, [I|Extracts], Args, + ArgIdx+2, First+1, Src, TupleType, ArgTypes); +eru_args2([], Extracts, Args, _, First, _, _, ArgTypes) -> + {Extracts,reverse(Args),First,ArgTypes}. + +restore_update_record(#opt_st{ssa=Linear,anno=Anno}=OptSt) -> + Limit = map_get(orig_cnt, Anno), + OptSt#opt_st{ssa=rur_blocks(Linear, Limit), + cnt=Limit,anno=maps:remove(orig_cnt, Anno)}. + +rur_blocks([{Lbl,#b_blk{is=Is}=Blk}|Rest], Limit) -> + [{Lbl,Blk#b_blk{is=rur_is(Is, Limit)}}|rur_blocks(Rest, Limit)]; +rur_blocks([], _) -> + []. -aa_assert_pair(none, _V, _SS) -> - ok; -aa_assert_pair({Elem,X}, V, SS) when Elem =:= hd; Elem =:= tl -> - case SS of - #{X:=#vas{status=as_parent}} -> - aa_assert_parent_of(V, X, SS); - #{} -> - throw({assertion_failure, V, - io_lib:format("extracted pair and ~p does not" - " have status as_parent", [X])}) - end; -aa_assert_pair({both,X,Y}, V, SS) -> - case SS of - #{X:=#vas{status=as_parent}, - Y:=#vas{status=as_parent}} -> - aa_assert_parent_of(V, X, SS), - aa_assert_parent_of(V, Y, SS); - #{} -> - throw({assertion_failure, V, - io_lib:format("extracted pairs ~p and ~p do not" - " have status as_parent", [X, Y])}) - end. +rur_is([#b_set{dst=#b_var{name=Name},op=get_tuple_element}|Rest], Limit) + when is_integer(Name), Name >= Limit -> + rur_is(Rest, Limit); +rur_is([#b_set{op=update_record, + args=[Hint,Size,Src|Updates], + anno=Anno0}=I0|Rest], Limit) -> + Anno = rur_filter_anno( + rur_filter_anno(Anno0, unique, Limit), + aliased, Limit), + Args = [Hint,Size,Src] ++ rur_args(Updates, Limit), + I = I0#b_set{args=Args,anno=Anno}, + [I|rur_is(Rest, Limit)]; +rur_is([I|Rest], Limit) -> + [I|rur_is(Rest, Limit)]; +rur_is([], _) -> + []. -aa_assert_tuple_elems([{_,X}|Ts], V, SS) -> - case SS of - #{X:=#vas{status=as_parent}} -> - aa_assert_parent_of(V, X, SS), - aa_assert_tuple_elems(Ts, V, SS); - #{} -> - throw({assertion_failure, V, - io_lib:format( - "child ~p does not have status as_parent", [X])}) - end; -aa_assert_tuple_elems([], _, _) -> - ok. - -aa_assert_extracted(Es, Ts, Pair, Var) -> - Actual = ordsets:union(ordsets:from_list([V || {_,V} <- Ts]), - ordsets:from_list(case Pair of - none -> []; - {_, X} -> [X]; - {both,X,Y} -> [X,Y] - end)), - case Es of - Actual -> - true; - _ -> - throw({assertion_failure, Var, - "has inconsistent extracted set"}) +rur_filter_anno(Anno, Key, Limit) -> + Vars = maps:get(Key, Anno, []), + case rur_filter_synthetic(Vars, Limit) of + [] -> + maps:remove(Key, Anno); + Vs -> + Anno#{Key=>Vs} end. -aa_assert_not_aliased([V|Vs], SS, Desc) -> - #{V:=#vas{status=S}} = SS, +rur_filter_synthetic([#b_var{name=N}|Rest], Limit) + when is_integer(N), N >= Limit -> + rur_filter_synthetic(Rest, Limit); +rur_filter_synthetic([V|Rest], Limit) -> + [V|rur_filter_synthetic(Rest, Limit)]; +rur_filter_synthetic([], _) -> + []. - case S of - unique -> ok; - as_parent -> ok; - _ -> - throw({assertion_failure, V, Desc}) - end, - aa_assert_not_aliased(Vs, SS, Desc); -aa_assert_not_aliased([], _SS, _) -> - true. --endif. +rur_args([_,#b_var{name=Name}|Updates], Limit) + when is_integer(Name), Name >= Limit -> + rur_args(Updates, Limit); +rur_args([Idx,V|Updates], Limit) -> + [Idx,V|rur_args(Updates, Limit)]; +rur_args([], _) -> + []. diff --git a/lib/compiler/src/beam_ssa_check.erl b/lib/compiler/src/beam_ssa_check.erl index fbdf6e72f396..29a1264daf6c 100644 --- a/lib/compiler/src/beam_ssa_check.erl +++ b/lib/compiler/src/beam_ssa_check.erl @@ -151,33 +151,32 @@ op_check([set,Result,{atom,_,Op}|PArgs], PAnno, #b_set{dst=Dst,args=AArgs,op=Op,anno=AAnno}=_I, Env0) -> ?DP("trying set ~p:~n res: ~p <-> ~p~n args: ~p <-> ~p~n i: ~p~n", [Op, Result, Dst, PArgs, AArgs, _I]), - Env = check_annos(PAnno, AAnno, Env0), - op_check_call(Op, Result, Dst, PArgs, AArgs, Env); + Env = op_check_call(Op, Result, Dst, PArgs, AArgs, Env0), + check_annos(PAnno, AAnno, Env); op_check([set,Result,{{atom,_,bif},{atom,_,Op}}|PArgs], PAnno, #b_set{dst=Dst,args=AArgs,op={bif,Op},anno=AAnno}=_I, Env0) -> ?DP("trying bif ~p:~n res: ~p <-> ~p~n args: ~p <-> ~p~n i: ~p~n", [Op, Result, Dst, PArgs, AArgs, _I]), - Env = check_annos(PAnno, AAnno, Env0), - op_check_call(Op, Result, Dst, PArgs, AArgs, Env); + Env = op_check_call(Op, Result, Dst, PArgs, AArgs, Env0), + check_annos(PAnno, AAnno, Env); op_check([none,{atom,_,ret}|PArgs], PAnno, - #b_ret{arg=AArg,anno=AAnno}=_I, Env0) -> + #b_ret{arg=AArg,anno=AAnno}=_I, Env) -> ?DP("trying return:, arg: ~p <-> ~p~n i: ~p~n", [PArgs, [AArg], _I]), - Env = check_annos(PAnno, AAnno, Env0), - post_args(PArgs, [AArg], Env); + check_annos(PAnno, AAnno, post_args(PArgs, [AArg], Env)); op_check([none,{atom,_,br}|PArgs], PAnno, #b_br{bool=ABool,succ=ASucc,fail=AFail,anno=AAnno}=_I, Env0) -> ?DP("trying br: arg: ~p <-> ~p~n i: ~p~n", [PArgs, [ABool,ASucc,AFail], _I]), - Env = check_annos(PAnno, AAnno, Env0), - post_args(PArgs, [ABool,#b_literal{val=ASucc},#b_literal{val=AFail}], Env); + Env = post_args(PArgs, + [ABool,#b_literal{val=ASucc},#b_literal{val=AFail}], Env0), + check_annos(PAnno, AAnno, Env); op_check([none,{atom,_,switch},PArg,PFail,{list,_,PArgs}], PAnno, #b_switch{arg=AArg,fail=AFail,list=AList,anno=AAnno}=_I, Env0) -> ?DP("trying switch: arg: ~p <-> ~p~n i: ~p~n", [PArgs, [AArg,AFail,AList], _I]), - Env1 = env_post(PArg, AArg, env_post(PFail, #b_literal{val=AFail}, Env0)), - Env = check_annos(PAnno, AAnno, Env1), - post_switch_args(PArgs, AList, Env); + Env = env_post(PArg, AArg, env_post(PFail, #b_literal{val=AFail}, Env0)), + check_annos(PAnno, AAnno, post_switch_args(PArgs, AList, Env)); op_check([label,PLbl], _Anno, {label,ALbl}, Env) when is_integer(ALbl) -> env_post(PLbl, #b_literal{val=ALbl}, Env). @@ -317,8 +316,8 @@ post_tuple([], [], Env) -> Env. post_map([{Key,Val}|Items], Map, Env) -> - K = build_map_key(Key), - V = build_map_key(Val), + K = build_map_key(Key, Env), + V = build_map_key(Val, Env), #{K := V} = Map, post_map(Items, maps:remove(K, Map), Env); @@ -326,38 +325,41 @@ post_map([], Map, Env) -> 0 = maps:size(Map), Env. -build_map_key({atom,_,A}) -> +build_map_key({atom,_,A}, _Env) -> A; -build_map_key({local_fun,{atom,_,N},{integer,_,A}}) -> +build_map_key({local_fun,{atom,_,N},{integer,_,A}}, _Env) -> #b_local{name=#b_literal{val=N},arity=A}; -build_map_key({integer,_,V}) -> +build_map_key({integer,_,V}, _Env) -> V; -build_map_key({float,_,V}) -> +build_map_key({float,_,V}, _Env) -> V; -build_map_key({binary,_,Bits}) -> +build_map_key({binary,_,Bits}, _Env) -> build_bitstring(Bits, <<>>); -build_map_key({list,_,Elems}) -> - build_map_key_list(Elems); -build_map_key({tuple,_,Elems}) -> - list_to_tuple([build_map_key(E) || E <- Elems]); -build_map_key({map,_,Elems}) -> - #{build_map_key(K) => build_map_key(V) || {K,V} <- Elems}; -build_map_key(_Key) -> +build_map_key({list,_,Elems}, Env) -> + build_map_key_list(Elems, Env); +build_map_key({tuple,_,Elems}, Env) -> + list_to_tuple([build_map_key(E, Env) || E <- Elems]); +build_map_key({map,_,Elems}, Env) -> + #{build_map_key(K, Env) => build_map_key(V, Env) || {K,V} <- Elems}; +build_map_key({var,_,V}, Env) -> + map_get(V, Env); +build_map_key(_Key, _Env) -> ?DP("Failed to match ~p~n", [_Key]), error({internal_pattern_match_error,build_map_key}). -build_map_key_list([E|Elems]) -> - [build_map_key(E)|build_map_key_list(Elems)]; -build_map_key_list([]) -> +build_map_key_list([E|Elems], Env) -> + [build_map_key(E, Env)|build_map_key_list(Elems, Env)]; +build_map_key_list([], _Env) -> []; -build_map_key_list(E) -> - build_map_key(E). +build_map_key_list(E, Env) -> + build_map_key(E, Env). check_annos([{term,{atom,_,Key},PTerm}|Patterns], Actual, Env0) -> - ?DP("Checking term anno ~p: ~p~nkeys: ~p~n", - [Key, PTerm, maps:keys(Actual)]), + ?DP("Checking term anno~n wanted anno-key ~p~n", [Key]), + ?DP(" actual anno keys ~p~n", [maps:keys(Actual)]), + ?DP(" pattern on selected anno ~p~n", [PTerm]), #{ Key := ATerm } = Actual, - ?DP("~p <-> ~p~n", [PTerm, ATerm]), + ?DP(" actual selected anno ~p~n", [ATerm]), Env = env_post(PTerm, #b_literal{val=ATerm}, Env0), ?DP("ok~n"), check_annos(Patterns, Actual, Env); diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl index f3a009f3625d..af6ffb6d00d9 100644 --- a/lib/compiler/src/beam_ssa_codegen.erl +++ b/lib/compiler/src/beam_ssa_codegen.erl @@ -1374,6 +1374,14 @@ cg_block([#cg_set{op=has_map_field,dst=Dst0,args=Args0,anno=Anno}|T], Context, S {Is0,St} = cg_block(T, Context, St0), Is = [I|Is0], {Is,St}; +cg_block([#cg_set{op=update_record,dst=Dst0,args=Args0,anno=Anno}|T], Context, St0) -> + Args = typed_args(Args0, Anno, St0), + Dst = beam_arg(Dst0, St0), + [Hint,{integer,Size},Src|Ss0] = Args, + Ss = cg_update_record_list(Ss0, []), + I = {update_record,Hint,Size,Src,Dst,{list,Ss}}, + {Is1,St} = cg_block(T, Context, St0), + {[I|Is1],St}; cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set], none, St) -> [Dst|Args] = beam_args([Dst0|Args0], St), Is = cg_instr(Op, Args, Dst, Set), @@ -1884,10 +1892,7 @@ cg_instr(recv_marker_reserve, [], Dst) -> cg_instr(remove_message, [], _Dst) -> [remove_message]; cg_instr(resume, [A,B], _Dst) -> - [{bif,raise,{f,0},[A,B],{x,0}}]; -cg_instr(update_record, [Hint, {integer,Size}, Src | Ss0], Dst) -> - Ss = cg_update_record_list(Ss0, []), - [{update_record,Hint,Size,Src,Dst,{list,Ss}}]. + [{bif,raise,{f,0},[A,B],{x,0}}]. cg_test({float,Op0}, Fail, Args, Dst, #cg_set{anno=Anno}) -> Op = case Op0 of diff --git a/lib/compiler/src/beam_ssa_destructive_update.erl b/lib/compiler/src/beam_ssa_destructive_update.erl new file mode 100644 index 000000000000..3ca0c0d53a16 --- /dev/null +++ b/lib/compiler/src/beam_ssa_destructive_update.erl @@ -0,0 +1,908 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2024. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% This pass implements transforms which allow for safe destructive +%% term updates. +%% +%% The optimization is done in three phases: +%% +%% * Scan +%% +%% The module is scanned and instructions suitable for +%% transformation into a destructive form are idenitifed +%% (find_applicable_instructions/2). +%% +%% * Initial-value-search +%% +%% When instructions are transformed to their destructive form, +%% their arguments may have to be modified in order to be suitable +%% for destructive update. For example, a literal <<>> may have to +%% be converted to a heap allocated term created by a +%% bs_writable_binary. Likewise an update_record instruction +%% returning a result which is input to a another update_record +%% instruction with the inplace hint, cannot have the reuse hint, +%% as then there are no guarantees for a unique value. +%% +%% Identifying such terms, instructions and literals is done by +%% find_initial_values/3. +%% +%% * Patching +%% +%% Given a set of instructions and literals to patch, the third +%% phase (patch_instructions/4) traverses the module and performs +%% the needed modifications. +%% +%% Currently this module implements the following transforms allowing +%% for destructive update: +%% +%% * Private append +%% +%% When a binary is grown by appending data to it using +%% `bs_create_bin`, a considerable performance improvement can be +%% achieved if the append can be done using the destructive +%% `private_append` instead of `append`. Using `private_append` +%% flavor of `bs_create_bin` is only possible when the binary +%% being extended has been created by `bs_writable_binary` or by +%% another `bs_create_bin` using `private_append`. As +%% `private_append` is destructive, an additional requirement is +%% that there is only one live reference to the binary being being +%% extended. +%% +%% This optimization implements a new SSA optimization pass which +%% finds suitable code sequences which iteratively grow a binaries +%% starting with `<<>>` and rewrites them to start with an initial +%% value created by `bs_writable_binary` and use `private_append` +%% for `bs_create_bin`. +%% +%% * In-place update of tuples/records +%% +%% Starting with Erlang version 26, all tuples are updated using +%% the update_record instruction. Updating tuples in-place, when +%% safe to do so, can lead to performance improvements, both due +%% to less copying but also due to less garbage. +%% +%% This optimization is implemented by looking for update_record +%% instructions, during the scan phase, where the source tuple +%% dies with the update and the source is unique as detected by +%% the alias analysis pass. During the patching phase these +%% instructions are given the `inplace` hint. +%% +%% During initial-value-search literal tuples are detected and +%% during the patching phase rewritten to be created using +%% put_tuple as literals cannot be updated in-place. + +-module(beam_ssa_destructive_update). +-moduledoc false. + +-export([opt/2]). + +-import(lists, [foldl/3, foldr/3, keysort/2, reverse/1]). + +-include("beam_ssa_opt.hrl"). +-include("beam_types.hrl"). + +%% -define(DEBUG, true). + +-ifdef(DEBUG). +-define(DP(FMT, ARGS), io:format(FMT, ARGS)). +-define(DP(FMT), io:format(FMT)). +ff(#b_local{name=#b_literal{val=N},arity=A}) -> + io_lib:format("~p/~p", [N,A]). +-else. +-define(DP(FMT, ARGS), skip). +-define(DP(FMT), skip). +-endif. + +-spec opt(st_map(), func_info_db()) -> {st_map(), func_info_db()}. +opt(StMap, FuncDb) -> + %% Ignore functions which are not in the function db (never + %% called). + Funs = [ F || F <- maps:keys(StMap), is_map_key(F, FuncDb)], + + %% Find instructions to transform from their 'functional' to their + %% 'destructive' form. + {Applicable,ValuesToTrack} = find_applicable_instructions(Funs, StMap), + ?DP("Found applicable instructions:~n~s", + [[io_lib:format(" in ~s\n~s", + [ff(F), + [io_lib:format(" ~p: ~p~n", [I,Info]) + || I:=Info <- PerInstruction]]) + || F:=PerInstruction <- Applicable]]), + ?DP("Found values to track:~n~s", + [[io_lib:format(" ~p in ~s: ~p~n", [Var,ff(F),Info]) + || {F,Var,Info} <- ValuesToTrack]]), + + %% Find initial values. + {InitialsToPatch,ForceCopy} = + find_initial_values(ValuesToTrack, StMap, FuncDb), + + %% Patch instructions and initial values. + patch_instructions(Applicable, InitialsToPatch, ForceCopy, StMap, FuncDb). + +find_applicable_instructions(Funs, StMap) -> + fai(Funs, #{}, [], StMap). + +fai([F|Funs], Instructions0, Values0, StMap) -> + #opt_st{ssa=Linear} = map_get(F, StMap), + {Instructions,Values} = fai_blk(Linear, F, Instructions0, Values0), + fai(Funs, Instructions, Values, StMap); +fai([], Instructions, Values, _StMap) -> + {Instructions,Values}. + +fai_blk([{_Lbl,#b_blk{is=Is}}|Linear], F, Instructions0, Values0) -> + {Instructions,Values} = fai_is(Is, F, Instructions0, Values0), + fai_blk(Linear, F, Instructions, Values); +fai_blk([], _, Instructions, Values) -> + {Instructions,Values}. + +fai_is([I|Is], F, Instructions0, Values0) -> + {Instructions,Values} = fai_i(I, F, Instructions0, Values0), + fai_is(Is, F, Instructions, Values); +fai_is([], _F, Instructions, Values) -> + {Instructions,Values}. + +fai_i(#b_set{dst=Dst, op=bs_create_bin, + args=[#b_literal{val=append}, + _, + Lit=#b_literal{val= <<>>}|_]}, + F, Instructions0, Values) -> + %% Special case for when the first fragment is a literal <<>> as + %% it won't be annotated as unique nor will it die with the + %% instruction. + Instructions = + add_applicable_instruction(Dst, + {appendable_binary,Dst,Lit}, + F, Instructions0), + {Instructions,Values}; +fai_i(#b_set{dst=Dst, op=bs_create_bin, + args=[#b_literal{val=append},SegmentInfo,Var|_], + anno=#{first_fragment_dies:=true}=Anno}, + F, Instructions0, Values0) -> + case is_unique(Var, Anno) andalso is_appendable(Anno, SegmentInfo) of + true -> + Instructions = + add_applicable_instruction(Dst, + {appendable_binary,Dst,Var}, + F, Instructions0), + Values = add_tracked_var(Var, init_writable, F, Values0), + {Instructions,Values}; + false -> + {Instructions0,Values0} + end; +fai_i(#b_set{dst=Dst,op=update_record, + args=[_Hint,_Size,Src|_Updates], + anno=#{source_dies:=true}=Anno}, + F, Instructions0, Values0) -> + case is_unique(Src, Anno) of + true -> + Instructions = + add_applicable_instruction(Dst, + {tuple_update,Dst,Src}, + F, Instructions0), + Values = add_tracked_var(Src, heap_tuple, F, Values0), + {Instructions,Values}; + false -> + {Instructions0,Values0} + end; +fai_i(_I, _F, Instructions, Values) -> + {Instructions,Values}. + +add_applicable_instruction(Instr, Info, F, Instructions0) -> + PerFun0 = maps:get(F, Instructions0, #{}), + PerInstruction = maps:get(Instr, PerFun0, []), + PerFun = PerFun0#{Instr=>[Info|PerInstruction]}, + Instructions0#{F=>PerFun}. + +add_tracked_var(Var, Info, F, Vars) -> + [{F,Var,Info}|Vars]. + +-record(fiv_st, + { + funcdb, + stmap, + defsdb = #{}, + literals = #{}, + valuesdb = #{}, + force_copy = #{} + }). + +find_initial_values(ValuesToTrack, StMap, FuncDb) -> + fiv(ValuesToTrack, #fiv_st{funcdb=FuncDb,stmap=StMap}). + +fiv([{Fun,Dst,Kind}|Work], FivSt0=#fiv_st{stmap=StMap}) -> + #{Fun:=#opt_st{ssa=SSA,args=Args}} = StMap, + {DefsInFun,FivSt} = fiv_defs_in_fun(Fun, Args, SSA, FivSt0), + ValuesInFun = fiv_values_in_fun(Fun, FivSt), + ?DP("*** tracking ~p in: ~s ***~n", [Kind, ff(Fun)]), + fiv_track_value_in_fun([{Dst,{self,Kind}}], Fun, Work, + DefsInFun, ValuesInFun, FivSt); +fiv([{Fun,{track_call_argument,Callee,Element,Idx}}|Work], + FivSt0=#fiv_st{stmap=StMap}) -> + #{Fun:=#opt_st{ssa=SSA,args=Args}} = StMap, + ?DP("*** Tracking ~p of the ~p:th argument in call to ~s" + " in the function ~s ***~n", [Element, Idx, ff(Callee), ff(Fun)]), + {DefsInFun,FivSt1} = fiv_defs_in_fun(Fun, Args, SSA, FivSt0), + ValuesInFun = fiv_values_in_fun(Fun, FivSt1), + {Vars,FivSt} = + fiv_get_call_arguments(Callee, Element, Idx, DefsInFun, Fun, FivSt1), + ?DP(" Vars to track: ~p~n", [Vars]), + fiv_track_value_in_fun(Vars, Fun, Work, DefsInFun, ValuesInFun, FivSt); +fiv([{Fun,{track_result,Element}}|Work], FivSt0=#fiv_st{stmap=StMap}) -> + #{Fun:=#opt_st{ssa=SSA,args=Args}} = StMap, + {DefsInFun,FivSt1} = fiv_defs_in_fun(Fun, Args, SSA, FivSt0), + ValuesInFun = fiv_values_in_fun(Fun, FivSt0), + ?DP("*** Tracking ~p of the result of ~s ***~n", [Element, ff(Fun)]), + {Results,FivSt} = fiv_get_results(SSA, Element, Fun, FivSt1), + ?DP("values to track inside the function: ~p~n", [Results]), + fiv_track_value_in_fun(Results, Fun, Work, DefsInFun, ValuesInFun, FivSt); +fiv([], FivSt) -> + {FivSt#fiv_st.literals,FivSt#fiv_st.force_copy}. + +patch_instructions(Applicable, InitialsToPatch, ForceCopy, StMap0, FuncDb) -> + ?DP("Instructions to patch:~n ~p~n", [Applicable]), + ?DP("Initial values to patch :~n ~p~n", [InitialsToPatch]), + ?DP("Force copy :~n ~p~n", [ForceCopy]), + %% Merge instructions and initial values so we only get one map + %% per fuctions which is indexed on the variable. + Merge = + fun(A, B) -> + maps:fold(fun(VarOrLbl, Info0, Acc) -> + Info = Info0++maps:get(VarOrLbl, Acc, []), + Acc#{VarOrLbl => Info} + end, A, B) + end, + Patches0 = maps:fold(fun(Fun, Initials, Acc) -> + InitialsInFun = Merge(Initials, + maps:get(Fun, Acc, #{})), + Acc#{Fun => InitialsInFun} + end, Applicable, InitialsToPatch), + Patches = maps:fold(fun(Fun, Initials, Acc) -> + InitialsInFun = Merge(Initials, + maps:get(Fun, Acc, #{})), + Acc#{Fun => InitialsInFun} + end, Patches0, ForceCopy), + ?DP("Patches:~n ~p~n", [Patches]), + StMap = maps:fold(fun(Fun, Ps, StMapAcc) -> + OptSt=#opt_st{ssa=SSA0,cnt=Cnt0} = + map_get(Fun, StMapAcc), + {SSA,Cnt} = patch_f(SSA0, Cnt0, Ps), + StMapAcc#{Fun => OptSt#opt_st{ssa=SSA,cnt=Cnt}} + end, StMap0, Patches), + {StMap, FuncDb}. + +is_unique(Var, Anno) -> + ordsets:is_element(Var, maps:get(unique, Anno, [])). + +is_appendable(Anno, #b_literal{val=[SegmentUnit|_]}) + when is_integer(SegmentUnit) -> + case Anno of + #{arg_types:=#{2:=#t_bitstring{appendable=true,size_unit=SizeUnit}}} -> + SizeUnit rem SegmentUnit == 0; + _ -> + false + end. + +%% Find all variables which are returned and return them in a worklist +fiv_get_results(SSA, Element, Fun, FivSt) -> + fiv_get_results(SSA, [], Element, Fun, FivSt). + +fiv_get_results([{_,#b_blk{last=#b_ret{arg=#b_var{}=V}}}|Rest], + Acc, Element, Fun, FivSt) -> + fiv_get_results(Rest, [{V,Element}|Acc], Element, Fun, FivSt); +fiv_get_results([{Lbl,#b_blk{last=#b_ret{arg=#b_literal{val=Lit}}}}|Rest], + Acc, Element, Fun, FivSt0) -> + FivSt = fiv_add_literal(Lit, Element, Fun, {ret,Lbl,Element}, FivSt0), + fiv_get_results(Rest, Acc, Element, Fun, FivSt); +fiv_get_results([_|Rest], Acc, Element, Fun, FivSt) -> + fiv_get_results(Rest, Acc, Element, Fun, FivSt); +fiv_get_results([], Acc, _, _Fun, FivSt) -> + {Acc, FivSt}. + +fiv_track_value_in_fun([{#b_var{}=V,Element}|Rest], Fun, Work, + Defs, ValuesInFun, FivSt0) + when is_map_key({V,Element}, ValuesInFun) -> + %% We have already explored this value. + ?DP("We have already explored ~p of ~p in ~s~n", [Element, V, ff(Fun)]), + fiv_track_value_in_fun(Rest, Fun, Work, Defs, ValuesInFun, FivSt0); +fiv_track_value_in_fun([{#b_var{}=V,Element}|Rest], Fun, Work0, Defs, + ValuesInFun0, FivSt0=#fiv_st{}) -> + ?DP("Tracking ~p of ~p in fun ~s~n", [Element, V, ff(Fun)]), + ValuesInFun = ValuesInFun0#{{V,Element}=>visited}, + case Defs of + #{V:=#b_set{dst=V,op=Op,args=Args}} -> + case {Op,Args,Element} of + {bs_create_bin,[#b_literal{val=append},_,Arg|_], + {self,init_writable}} -> + ?DP("value is created by append, adding ~p.~n", + [{Arg,self}]), + fiv_track_value_in_fun([{Arg,Element}|Rest], Fun, Work0, + Defs, ValuesInFun, FivSt0); + {bs_create_bin,[#b_literal{val=private_append},_,_|_], + {self,init_writable}} -> + ?DP("value is created by private_append.~n"), + %% If the code has already been rewritten to use + %% private_append, tracking the accumulator to + %% ensure that it is is writable has already + %% been seen to, so no need to track it. + fiv_track_value_in_fun(Rest, Fun, Work0, + Defs, ValuesInFun, FivSt0); + {bs_init_writable,_,{self,init_writable}} -> + ?DP("value is created by bs_init_writable.~n"), + %% bs_init_writable creates a writable binary, so + %% we are done. + fiv_track_value_in_fun(Rest, Fun, Work0, + Defs, ValuesInFun, FivSt0); + {call,[#b_local{}=Callee|_Args],_} -> + ?DP("value is created by local call to ~s.~n", + [ff(Callee)]), + fiv_track_value_into_call(Callee, Element, Fun, Rest, Work0, + Defs, ValuesInFun, FivSt0); + {call,[#b_remote{mod=#b_literal{val=erlang}, + name=#b_literal{val=error}, + arity=1}|_Args],_} -> + ?DP("value is from non-returning external call.~n"), + %% As erlang:error/1 never returns, we shouldn't + %% try to track this value. + fiv_track_value_in_fun(Rest, Fun, Work0, + Defs, ValuesInFun, FivSt0); + {get_hd,[List],_} -> + %% We only handle the case when the tracked value + %% is in the head field of a cons. This is due to + %% the type analyser always assuming that a cons + %% is part of a list and therefore we will never + %% be able to safely rewrite an accumulator in the + %% tail field of the cons, thus we will never + %% have to track it. + ?DP("value is created by a get_hd, adding ~p.~n", + [{List,{hd,Element}}]), + fiv_track_value_in_fun( + [{List,{hd,Element}}|Rest], Fun, Work0, + Defs, ValuesInFun, FivSt0); + {get_tuple_element,[#b_var{}=Tuple,#b_literal{val=Idx}],_} -> + ?DP("value is created by a get_tuple_element, adding ~p.~n", + [{Tuple,{tuple_element,Idx,Element}}]), + fiv_track_value_in_fun( + [{Tuple,{tuple_element,Idx,Element}}|Rest], Fun, Work0, + Defs, ValuesInFun, FivSt0); + {phi,_,_} -> + ?DP("value is created by a phi~n"), + {ToExplore,FivSt} = fiv_handle_phi(Fun, V, Args, + Element, FivSt0), + fiv_track_value_in_fun(ToExplore ++ Rest, Fun, Work0, + Defs, ValuesInFun, FivSt); + {put_tuple,_,_} -> + ?DP("value is created by a put tuple.~n"), + fiv_track_put_tuple(Args, Element, Rest, Fun, V, Work0, + Defs, ValuesInFun, FivSt0); + {put_list,_,_} -> + ?DP("value is created by a put list.~n"), + fiv_track_put_list(Args, Element, Rest, Fun, V, Work0, + Defs, ValuesInFun, FivSt0); + {update_record,_,_} -> + ?DP("value is created by a update_record.~n"), + fiv_track_update_record(Args, Element, Rest, Fun, V, Work0, + Defs, ValuesInFun, FivSt0); + {_,_,_} -> + %% Above we have handled all operations through + %% which we are able to track the value to its + %% construction. All other operations are from + %% execution paths not reachable when the actual + %% type (at runtime) is a relevant bitstring. + %% Thus we can safely abort the tracking here. + %% + %% Note: That the bif element/2 is not handled is + %% not an omission. When the element index is + %% statically known, the CSE pass will convert the + %% bif to the instruction get_tuple_element. When + %% not known, the default action is correct. The + %% same reasoning applies to hd, tl, and map_get. + ?DP("value is created by unknown instruction.~n"), + fiv_track_value_in_fun(Rest, Fun, Work0, + Defs, ValuesInFun, FivSt0) + end; + #{V:={arg,Idx}} -> + ?DP("value is function argument.~n"), + fiv_track_value_into_caller(Element, Idx, Rest, Fun, Work0, Defs, + ValuesInFun, FivSt0) + end; +fiv_track_value_in_fun([{#b_literal{},_}|Rest], Fun, Work, + Defs, ValuesInFun, FivSt) -> + fiv_track_value_in_fun(Rest, Fun, Work, Defs, ValuesInFun, FivSt); +fiv_track_value_in_fun([], Fun, Work, _Defs, ValuesInFun, + FivSt0=#fiv_st{valuesdb=ValuesDb0}) -> + %% We are done with this function. Store the result in the + %% valuesdb and continue with the work list. + FivSt = FivSt0#fiv_st{valuesdb=ValuesDb0#{Fun=>ValuesInFun}}, + fiv(Work, FivSt). + +fiv_track_value_into_call(Callee, Element, CallerFun, CallerWork, GlobalWork0, + CallerDefs, CallerValuesInFun, FivSt0) -> + GlobalWork = [{Callee, {track_result, Element}}|GlobalWork0], + fiv_track_value_in_fun(CallerWork, CallerFun, GlobalWork, + CallerDefs, CallerValuesInFun, FivSt0). + +fiv_track_value_into_caller(Element, ArgIdx, + CalledFunWorklist, CalledFun, + GlobalWorklist0, + CalledFunDefs, CalledFunValues, + FivSt0=#fiv_st{funcdb=FuncDb,stmap=StMap}) -> + #func_info{in=Callers} = map_get(CalledFun, FuncDb), + ?DP("Track into callers of ~s, tracking arg-idx:~p, ~p~n callers:~s~n", + [ff(CalledFun), ArgIdx, Element, + string:join([ff(C) || C <- Callers],", ")]), + %% The caller information in func_info does not remove a caller + %% when it is inlined into another function (although the new + %% caller is added), so we have to filter out functions which lack + %% entries in the st_map (as they are dead, they have been removed + %% from the stmap). + Work = [{Caller,{track_call_argument,CalledFun,Element,ArgIdx}} + || Caller <- Callers, is_map_key(Caller, StMap)], + GlobalWorklist = Work ++ GlobalWorklist0, + fiv_track_value_in_fun(CalledFunWorklist, CalledFun, GlobalWorklist, + CalledFunDefs, CalledFunValues, FivSt0). + +fiv_track_put_tuple(FieldVars, {tuple_element,Idx,_}, + Work, Fun, _Dst, GlobalWork, + Defs, ValuesInFun, FivSt) when length(FieldVars) =< Idx -> + %% The value we are tracking was constructed by a put tuple, but + %% it can't be this put_tuple as it has too few elements. + fiv_track_value_in_fun(Work, Fun, GlobalWork, + Defs, ValuesInFun, FivSt); +fiv_track_put_tuple(FieldVars, {tuple_element,Idx,Element}, + Work, Fun, Dst, GlobalWork, + Defs, ValuesInFun, FivSt0) -> + %% The value we are tracking was constructed by a put tuple and we + %% are interested in continuing the tracking of the field + case lists:nth(Idx + 1, FieldVars) of + ToTrack = #b_var{} -> + fiv_track_value_in_fun([{ToTrack,Element}|Work], Fun, GlobalWork, + Defs, ValuesInFun, FivSt0); + #b_literal{val=Lit} -> + FivSt = fiv_add_literal(Lit, Element, + Fun, {opargs,Dst,Idx,Lit,Element}, + FivSt0), + fiv_track_value_in_fun(Work, Fun, GlobalWork, + Defs, ValuesInFun, FivSt) + end; +fiv_track_put_tuple(_FieldVars, _, + Work, Fun, _Dst, GlobalWork, + Defs, ValuesInFun, DefSt) -> + %% As the tracked element isn't a tuple element, this is an + %% execution path which isn't type compatible, stop tracking. + fiv_track_value_in_fun(Work, Fun, GlobalWork, + Defs, ValuesInFun, DefSt). + +fiv_track_update_record([#b_literal{val=copy}|_], + {self,heap_tuple}, + Work, Fun, _Dst, GlobalWork, + Defs, ValuesInFun, FivSt) -> + %% The value we are tracking was constructed by an update_record, + %% as the hint is 'copy', no further tracking is needed. + ?DP("Value is on heap and unique"), + fiv_track_value_in_fun(Work, Fun, GlobalWork, + Defs, ValuesInFun, FivSt); +fiv_track_update_record([#b_literal{val=reuse}|_], + {self,heap_tuple}, + Work, Fun, Dst, GlobalWork, + Defs, ValuesInFun, #fiv_st{force_copy=FC0}=FivSt0) -> + %% The value we are tracking was constructed by an update_record, + %% but as the hint is 'reuse', the instruction has to be patched + %% to use 'copy' as otherwise the uniqueness of the result is not + %% guaranteed. + ?DP("Value is on heap but not unique~n"), + FunFC0 = maps:get(Fun, FC0, #{}), + ThisDst0 = maps:get(Dst, FunFC0, []), + ThisDst = ordsets:add_element({force_copy,Dst}, ThisDst0), + FunFC = FunFC0#{Dst=>ThisDst}, + FC = FC0#{Fun=>FunFC}, + FivSt = FivSt0#fiv_st{force_copy=FC}, + fiv_track_value_in_fun(Work, Fun, GlobalWork, + Defs, ValuesInFun, FivSt); +fiv_track_update_record([_Hint,_Size,Src|Updates], + {tuple_element,Idx,Element}=What, + Work, Fun, Dst, GlobalWork, + Defs, ValuesInFun, FivSt0) -> + ?DP("Looking for idx: ~p among ~p~n", [Idx,Updates]), + case fiv_get_update(Idx+1, Updates) of + {#b_var{}=ToTrack,_} -> + ?DP("Tracked value is among the updates~n"), + fiv_track_value_in_fun([{ToTrack,Element}|Work], Fun, GlobalWork, + Defs, ValuesInFun, FivSt0); + {#b_literal{val=Lit},ArgNo} -> + ?DP("Tracked literal value is among the updates," + " it is in argument ~p~n", [ArgNo]), + FivSt = fiv_add_literal(Lit, Element, + Fun, {opargs,Dst,ArgNo,Lit,Element}, + FivSt0), + fiv_track_value_in_fun(Work, Fun, GlobalWork, + Defs, ValuesInFun, FivSt); + none -> + ?DP("Tracked value is not among the updates~n"), + fiv_track_value_in_fun([{Src,What}|Work], Fun, GlobalWork, + Defs, ValuesInFun, FivSt0) + end. + +fiv_get_update(Idx, Updates) -> + fiv_get_update(Idx, Updates, 3). + +fiv_get_update(Idx, [#b_literal{val=Idx},Val|_Updates], ArgNo) -> + {Val,ArgNo+1}; +fiv_get_update(Idx, [#b_literal{},_|Updates], ArgNo) -> + fiv_get_update(Idx, Updates, ArgNo+2); +fiv_get_update(_, [], _) -> + none. + +fiv_track_put_list([Hd,_Tl], {hd,Element}, + Work, Fun, Dst, GlobalWork, + Defs, ValuesInFun, FivSt0) -> + %% The value we are tracking was constructed by a put list and we + %% are interested in continuing the tracking of the field. We only + %% handle the case when the tracked value is in the head field of + %% a cons. This is due to the type analyser always assuming that a + %% cons is part of a list and therefore we will never be able to + %% safely rewrite an accumulator in the tail field of the cons, + %% thus we will never have to track it. + case Hd of + #b_var{} -> + fiv_track_value_in_fun([{Hd,Element}|Work], Fun, GlobalWork, + Defs, ValuesInFun, FivSt0); + #b_literal{val=Lit} -> + FivSt = fiv_add_literal(Lit, Element, + Fun, {opargs,Dst,0,Lit,Element}, FivSt0), + fiv_track_value_in_fun(Work, Fun, GlobalWork, Defs, + ValuesInFun, FivSt) + end; +fiv_track_put_list([_Hd,_Tl], _, Work, Fun, _Dst, GlobalWork, + Defs, ValuesInFun, DefSt) -> + %% As the tracked element isn't a list element, this is an + %% execution path which isn't type compatible, stop tracking. + fiv_track_value_in_fun(Work, Fun, GlobalWork, Defs, ValuesInFun, DefSt). + +%% Find all calls to Callee and produce a work-list containing all +%% values which are used as the Idx:th argument. +fiv_get_call_arguments(Callee, Element, Idx, Defs, Fun, FivSt0) -> + %% We traverse all defs inside the caller to find the calls. + maps:fold(fun(_, #b_set{dst=Dst,op=call,args=[Target|Args]}, {Acc,FivSt}) + when Callee =:= Target -> + {Values,FivSt1} = + fiv_gca(Args, Element, Idx, Fun, Dst, FivSt), + {Values ++ Acc, FivSt1}; + (_, _, Acc) -> + Acc + end, {[], FivSt0}, Defs). + +fiv_gca(Args, Element, Idx, Fun, Dst, FivSt) -> + fiv_gca(Args, 0, Element, Idx, Fun, Dst, FivSt). + +fiv_gca([#b_var{}=V|_], I, Element, I, _Fun, _Dst, FivSt) -> + %% This is the argument we are tracking. + {[{V,Element}], FivSt}; +fiv_gca([#b_literal{val=Lit}|_], I, Element, I, Fun, Dst, FivSt) -> + {[], fiv_add_literal(Lit, Element, Fun, {opargs,Dst,I+1,Lit,Element}, FivSt)}; +fiv_gca([_|Args], I, Element, Idx, Fun, Dst, FivSt) -> + fiv_gca(Args, I + 1, Element, Idx, Fun, Dst, FivSt). + +fiv_handle_phi(Fun, Dst, Args, Element, FivSt0) -> + foldl(fun({#b_literal{val=Lit},Lbl}, {Acc,FivStAcc0}) -> + FivStAcc = + fiv_add_literal(Lit, Element, + Fun, {phi,Dst,Lbl,Lit,Element}, + FivStAcc0), + {Acc, FivStAcc}; + ({V=#b_var{},_Lbl}, {Acc,FivStAcc}) -> + ?DP("will explore ~p: ~p~n", [V,Element]), + {[{V,Element}|Acc],FivStAcc} + end, {[],FivSt0}, Args). + +%% Cache calculation of the defs for a function so we only do it once. +fiv_defs_in_fun(Fun, Args, SSA, FivSt=#fiv_st{defsdb=DefsDb}) -> + case DefsDb of + #{Fun:=Defs} -> + {Defs, FivSt}; + #{} -> + BlockMap = maps:from_list(SSA), + Labels = maps:keys(BlockMap), + Defs0 = beam_ssa:definitions(Labels, BlockMap), + {Defs,_} = foldl(fun(Arg, {Acc,Idx}) -> + {Acc#{Arg => {arg,Idx}}, Idx + 1} + end, {Defs0,0}, Args), + {Defs, FivSt#fiv_st{defsdb=DefsDb#{Fun=>Defs}}} + end. + +%% Look up what we know about the values in Fun. +fiv_values_in_fun(Fun, #fiv_st{valuesdb=ValuesDb}) -> + maps:get(Fun, ValuesDb, #{}). + +%% Add the LitInfo to the database of literals to patch if Lit is +%% compatible with Element. +%% +%% As tracking doesn't make any attempt to use type information to +%% exclude execution paths not relevant when tracking an appendable +%% binary, it can happen that we encounter literals which do not match +%% the type of the element. We can safely ignore the literal in that +%% case. +fiv_add_literal(Lit, Element, Fun, LitInfo, FivSt=#fiv_st{literals=Ls}) -> + case fiv_are_lit_and_element_compatible(Lit, Element) of + true -> + PerFun0 = maps:get(Fun, Ls, #{}), + Key = element(2, LitInfo), + PerKey = maps:get(Key, PerFun0, []), + %% We only want to add the same literal once. + ?DP("~s literal ~p in ~s~n", + [case ordsets:is_element(LitInfo, PerKey) of + true -> + "Ignoring already tracked"; + false -> + "Adding" + end,LitInfo,ff(Fun)]), + PerFun = PerFun0#{Key => ordsets:add_element(LitInfo, PerKey)}, + FivSt#fiv_st{literals=Ls#{Fun => PerFun}}; + false -> + FivSt + end. + +%% +%% Return true if the literal is compatible with the element. +fiv_are_lit_and_element_compatible(Lit, Element) -> + case Element of + {tuple_element,Idx,E} + when is_tuple(Lit), erlang:tuple_size(Lit) > Idx -> + fiv_are_lit_and_element_compatible(erlang:element(Idx + 1, Lit), E); + {self,heap_tuple} -> + is_tuple(Lit); + {self,init_writable} -> + is_bitstring(Lit); + {hd,E} when is_list(Lit), (Lit =/= []) -> + [L|_] = Lit, + fiv_are_lit_and_element_compatible(L, E); + _ -> + false + end. + +patch_f(SSA0, Cnt0, Patches) -> + patch_f(SSA0, Cnt0, Patches, [], []). + +patch_f([{Lbl,Blk=#b_blk{is=Is0,last=Last0}}|Rest], + Cnt0, PD0, Acc0, BlockAdditions0) -> + {Last,Extra,Cnt2,PD} = + case PD0 of + #{ Lbl := Patches } -> + {Last1,Extra0,Cnt1} = patch_ret(Last0, Patches, Cnt0), + {Last1,reverse(Extra0),Cnt1,maps:remove(Lbl, PD0)}; + #{} -> + {Last0,[],Cnt0,PD0} + end, + {Is,Cnt,BlockAdditions} = patch_is(Is0, PD, Cnt2, [], []), + Acc = [{Lbl,Blk#b_blk{is=Is++Extra,last=Last}}|Acc0], + patch_f(Rest, Cnt, PD, Acc, BlockAdditions++BlockAdditions0); +patch_f([], Cnt, _PD, Acc, BlockAdditions) -> + ?DP("BlockAdditions: ~p~n", [BlockAdditions]), + Linear = insert_block_additions(Acc, maps:from_list(BlockAdditions), []), + ?DP("SSA-result:~n~p~n", [Linear]), + {Linear, Cnt}. + +patch_is([I0=#b_set{dst=Dst}|Rest], PD0, Cnt0, Acc, BlockAdditions0) + when is_map_key(Dst, PD0) -> + #{ Dst := Patches } = PD0, + PD = maps:remove(Dst, PD0), + case Patches of + [{opargs,Dst,_,_,_}|_] -> + OpArgs = [{Idx,Lit,Element} + || {opargs,D,Idx,Lit,Element} <- Patches, Dst =:= D], + Forced = [ F || {force_copy,_}=F <- Patches], + I1 = case Forced of + [] -> + I0; + _ -> + no_reuse(I0) + end, + 0 = length(Patches) - length(Forced) - length(OpArgs), + Ps = keysort(1, OpArgs), + {Is,Cnt} = patch_opargs(I1, Ps, Cnt0), + patch_is(Rest, PD, Cnt, Is++Acc, BlockAdditions0); + [{appendable_binary,Dst,#b_literal{val= <<>>}=Lit}] -> + %% Special case for when the first fragment is a literal + %% <<>> and it has to be replaced with a bs_init_writable. + #b_set{op=bs_create_bin,dst=Dst,args=Args0}=I0, + [#b_literal{val=append},SegInfo,Lit|OtherArgs] = Args0, + {V,Cnt} = new_var(Cnt0), + Init = #b_set{op=bs_init_writable,dst=V,args=[#b_literal{val=256}]}, + I = I0#b_set{args=[#b_literal{val=private_append}, + SegInfo,V|OtherArgs]}, + patch_is(Rest, PD, Cnt, [I,Init|Acc], BlockAdditions0); + [{appendable_binary,Dst,_}] -> + #b_set{op=bs_create_bin,dst=Dst,args=Args0}=I0, + [#b_literal{val=append}|OtherArgs] = Args0, + I = I0#b_set{args=[#b_literal{val=private_append}|OtherArgs]}, + patch_is(Rest, PD, Cnt0, [I|Acc], BlockAdditions0); + [{phi,Dst,_,_,_}|_] -> + {I, Extra, Cnt} = patch_phi(I0, Patches, Cnt0), + patch_is(Rest, PD, Cnt, [I|Acc], Extra++BlockAdditions0); + [{tuple_update,Dst,_Src}|Other] -> + I = set_inplace(I0), + patch_is([I|Rest], PD#{Dst=>Other}, Cnt0, Acc, BlockAdditions0); + [{force_copy,Dst}|Other] -> + patch_is([no_reuse(I0)|Rest], PD#{Dst=>Other}, + Cnt0, Acc, BlockAdditions0); + [] -> + patch_is(Rest, PD, Cnt0, [I0|Acc], BlockAdditions0) + end; +patch_is([I|Rest], PD, Cnt, Acc, BlockAdditions) -> + patch_is(Rest, PD, Cnt, [I|Acc], BlockAdditions); +patch_is([], _, Cnt, Acc, BlockAdditions) -> + {reverse(Acc), Cnt, BlockAdditions}. + +set_inplace(#b_set{dst=_Dst,args=[_Hint,Size,Src|Updates]}=I0) -> + ?DP("Setting ~p to inplace~n", [_Dst]), + I0#b_set{args=[#b_literal{val=inplace},Size,Src|Updates]}. + +no_reuse(#b_set{dst=_Dst,args=[#b_literal{val=reuse},Size,Src|Updates]} = I0) -> + ?DP("Setting ~p to copy~n", [_Dst]), + I0#b_set{args=[#b_literal{val=copy},Size,Src|Updates]}; +no_reuse(I) -> + I. + +%% The only time when we patch a return is when it returns a +%% literal. +patch_ret(Last=#b_ret{arg=#b_literal{val=Lit}}, Patches, Cnt0) -> + ?DP("patch_appends_ret:~n lit: ~p~n Patches: ~p~n", [Lit, Patches]), + Element = aggregate_ret_patches(keysort(1, [E || {ret,_,E} <- Patches])), + ?DP(" element: ~p~n", [Element]), + {V,Extra,Cnt} = patch_literal_term(Lit, Element, Cnt0), + {Last#b_ret{arg=V}, Extra, Cnt}. + +%% Aggregate patches to a ret instruction to produce a single patch. +aggregate_ret_patches([R={self,heap_tuple}]) -> + R; +aggregate_ret_patches([R={self,init_writable}]) -> + R; +aggregate_ret_patches([{tuple_element,I,E}|Rest]) -> + Elements = [{I,E}|aggregate_ret_patches_tuple(Rest)], + {tuple_elements,Elements}. + +aggregate_ret_patches_tuple([{tuple_element,I,E}|Rest]) -> + [{I,E}|aggregate_ret_patches_tuple(Rest)]; +aggregate_ret_patches_tuple([]) -> + []. + +%% Should return the instructions in reversed order +patch_opargs(I0=#b_set{args=Args}, Patches0, Cnt0) -> + ?DP("Patching args in ~p~n Args: ~p~n Patches: ~p~n", + [I0,Args,Patches0]), + Patches = merge_arg_patches(Patches0), + ?DP(" Merged patches: ~p~n", [Patches]), + {PatchedArgs,Is,Cnt} = patch_opargs(Args, Patches, 0, [], [], Cnt0), + {[I0#b_set{args=reverse(PatchedArgs)}|Is], Cnt}. + +patch_opargs([#b_literal{val=Lit}|Args], [{Idx,Lit,Element}|Patches], + Idx, PatchedArgs, Is, Cnt0) -> + ?DP("Patching arg idx ~p~n lit: ~p~n elem: ~p~n", [Idx,Lit,Element]), + {Arg,Extra,Cnt} = patch_literal_term(Lit, Element, Cnt0), + patch_opargs(Args, Patches, Idx + 1, [Arg|PatchedArgs], Extra++Is, Cnt); +patch_opargs([Arg|Args], Patches, Idx, PatchedArgs, Is, Cnt) -> + ?DP("Skipping arg idx ~p~n arg: ~p~n patches: ~p~n", [Idx,Arg,Patches]), + patch_opargs(Args, Patches, Idx + 1, [Arg|PatchedArgs], Is, Cnt); +patch_opargs([], [], _, PatchedArgs, Is, Cnt) -> + {PatchedArgs, Is, Cnt}. + +%% The way find_initial_values work, we can end up with multiple +%% patches patching different parts of a tuple or pair. We merge them +%% here. +merge_arg_patches([{Idx,Lit,P0},{Idx,Lit,P1}=Next|Patches]) -> + case {P0, P1} of + {{tuple_element,I0,E0},{tuple_element,I1,E1}} -> + P = {tuple_elements,[{I0,E0},{I1,E1}]}, + merge_arg_patches([{Idx,Lit,P}|Patches]); + {{tuple_elements,Es},{tuple_element,I,E}} -> + P = {tuple_elements,[{I,E}|Es]}, + merge_arg_patches([{Idx,Lit,P}|Patches]); + {{self,heap_tuple},_} -> + %% P0 forces this argument onto the heap, as P1 patches + %% something inside the same tuple, First can be dropped. + merge_arg_patches([Next|Patches]) + end; +merge_arg_patches([P|Patches]) -> + [P|merge_arg_patches(Patches)]; +merge_arg_patches([]) -> + []. + +patch_phi(I0=#b_set{op=phi,args=Args0}, Patches, Cnt0) -> + L2P = foldl(fun(Phi={phi,_,Lbl,_,_}, Acc) -> + Acc#{Lbl => Phi} + end, #{}, Patches), + {Args, Extra, Cnt} = + foldr(fun(Arg0={_,Lbl}, {ArgsAcc,ExtraAcc,CntAcc}) -> + case L2P of + #{Lbl := {phi,_,Lbl,Lit,Element}} -> + {Arg,Extra,Cnt1} = + patch_literal_term(Lit, Element, CntAcc), + {[{Arg,Lbl}|ArgsAcc], + [{Lbl,Extra}|ExtraAcc], Cnt1}; + _ -> + {[Arg0|ArgsAcc], ExtraAcc, CntAcc} + end + end, {[],[],Cnt0}, Args0), + I = I0#b_set{op=phi,args=Args}, + {I, Extra, Cnt}. + +%% Should return the instructions in reversed order +patch_literal_term(Tuple, {tuple_elements,Elems}, Cnt) -> + Es = [{tuple_element,I,E} || {I,E} <- keysort(1, Elems)], + patch_literal_tuple(Tuple, Es, Cnt); +patch_literal_term(Tuple, E={tuple_element,_,_}, Cnt) -> + patch_literal_tuple(Tuple, [E], Cnt); +patch_literal_term(Tuple, {self,heap_tuple}, Cnt0) -> + %% Build the tuple on the heap. + {V,Cnt} = new_var(Cnt0), + I = #b_set{op=put_tuple,dst=V, + args=[#b_literal{val=E} || E <- tuple_to_list(Tuple)]}, + {V,[I],Cnt}; +patch_literal_term(<<>>, {self,init_writable}, Cnt0) -> + {V,Cnt} = new_var(Cnt0), + I = #b_set{op=bs_init_writable,dst=V,args=[#b_literal{val=256}]}, + {V,[I],Cnt}; +patch_literal_term([H0|T0], {hd,Element}, Cnt0) -> + {H,Extra,Cnt1} = patch_literal_term(H0, Element, Cnt0), + {T,[],Cnt1} = patch_literal_term(T0, [], Cnt1), + {Dst,Cnt} = new_var(Cnt1), + I = #b_set{op=put_list,dst=Dst,args=[H,T]}, + {Dst, [I|Extra], Cnt}; +patch_literal_term(Lit, [], Cnt) -> + {#b_literal{val=Lit}, [], Cnt}. + +patch_literal_tuple(Tuple, Elements0, Cnt) -> + ?DP("Will patch literal tuple~n tuple:~p~n elements: ~p~n", + [Tuple,Elements0]), + Elements = [ E || {tuple_element,_,_}=E <- Elements0], + patch_literal_tuple(erlang:tuple_to_list(Tuple), Elements, [], [], 0, Cnt). + +patch_literal_tuple([Lit|LitElements], [{tuple_element,Idx,Element}|Elements], + Patched, Extra, Idx, Cnt0) -> + ?DP("patch_literal_tuple: idx:~p~n Lit: ~p~n patch: ~p~n", + [Idx, Lit, Element]), + {V,Exs,Cnt} = patch_literal_term(Lit, Element, Cnt0), + patch_literal_tuple(LitElements, Elements, [V|Patched], + Exs ++ Extra, Idx + 1, Cnt); +patch_literal_tuple([Lit|LitElements], Patches, Patched, Extra, Idx, Cnt) -> + ?DP("patch_literal_tuple: skipping idx:~p~n Lit: ~p~n patches: ~p~n", + [Idx, Lit, Patches]), + {T,[],Cnt} = patch_literal_term(Lit, [], Cnt), + patch_literal_tuple(LitElements, Patches, [T|Patched], Extra, Idx + 1, Cnt); +patch_literal_tuple([], [], Patched, Extra, _, Cnt0) -> + {V,Cnt} = new_var(Cnt0), + I = #b_set{op=put_tuple,dst=V,args=reverse(Patched)}, + {V,[I|Extra],Cnt}. + +new_var(Count) -> + {#b_var{name=Count},Count+1}. + +%% Done with an accumulator to reverse the reversed block order from +%% patch_appends_f/5. +insert_block_additions([Blk0={L,B=#b_blk{is=Is0}}|RevLinear], + Lbl2Addition, Acc) -> + Blk = case Lbl2Addition of + #{ L := Additions} -> + Is = Is0 ++ reverse(Additions), + {L,B#b_blk{is=Is}}; + _ -> + Blk0 + end, + insert_block_additions(RevLinear, Lbl2Addition, [Blk|Acc]); +insert_block_additions([], _, Acc) -> + Acc. diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl index c31b18f6e992..4bf26be22d4b 100644 --- a/lib/compiler/src/beam_ssa_opt.erl +++ b/lib/compiler/src/beam_ssa_opt.erl @@ -286,9 +286,9 @@ epilogue_module_passes(Opts) -> fun({StMap, FuncDb}) -> beam_ssa_alias:opt(StMap, FuncDb) end}, - {ssa_opt_private_append, + {ssa_opt_destructive_update, fun({StMap, FuncDb}) -> - beam_ssa_private_append:opt(StMap, FuncDb) + beam_ssa_destructive_update:opt(StMap, FuncDb) end}], passes_1(Ps0, Opts). diff --git a/lib/compiler/src/beam_ssa_private_append.erl b/lib/compiler/src/beam_ssa_private_append.erl deleted file mode 100644 index fe1a897f03ae..000000000000 --- a/lib/compiler/src/beam_ssa_private_append.erl +++ /dev/null @@ -1,652 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2023. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% -%% - -%% When a binary is grown by appending data to it using -%% `bs_create_bin`, a considerable performance improvement can be -%% achieved if the append can be done using the destructive -%% `private_append` instead of `append`. Using `private_append` flavor -%% of `bs_create_bin` is only possible when the binary being extended -%% has been created by `bs_writable_binary` or by another -%% `bs_create_bin` using `private_append`. As `private_append` is -%% destructive, an additional requirement is that there is only one -%% live reference to the binary being being extended. - -%% This optimization implements a new SSA optimization pass which -%% finds suitable code sequences which iteratively grow a binaries -%% starting with `<<>>` and rewrites them to start with an initial -%% value created by `bs_writable_binary` and use `private_append` for -%% `bs_create_bin`. - --module(beam_ssa_private_append). --moduledoc false. - --export([opt/2]). - --import(lists, [foldl/3, foldr/3, keysort/2, map/2, reverse/1]). - --include("beam_ssa_opt.hrl"). --include("beam_types.hrl"). - -%% -define(DEBUG, true). - --ifdef(DEBUG). --define(DP(FMT, ARGS), io:format(FMT, ARGS)). --define(DP(FMT), io:format(FMT)). --else. --define(DP(FMT, ARGS), skip). --define(DP(FMT), skip). --endif. - --spec opt(st_map(), func_info_db()) -> {st_map(), func_info_db()}. -opt(StMap, FuncDb) -> - %% Ignore functions which are not in the function db (never - %% called). - Funs = [ F || F <- maps:keys(StMap), is_map_key(F, FuncDb)], - private_append(Funs, StMap, FuncDb). - -private_append(Funs, StMap0, FuncDb) -> - Appends = maps:fold(fun(Fun, As, Acc) -> - [{Fun,A} || A <- As] ++ Acc - end, [], find_appends(Funs, StMap0, #{})), - %% We now have to find where we create the binaries in order to - %% patch them. - Defs = find_defs(Appends, StMap0, FuncDb), - StMap = patch_appends(Defs, Appends, StMap0), - {StMap, FuncDb}. - -find_appends([F|Funs], StMap, Found0) -> - #opt_st{ssa=Linear} = map_get(F, StMap), - Found = find_appends_blk(Linear, F, Found0), - find_appends(Funs, StMap, Found); -find_appends([], _, Found) -> - Found. - -find_appends_blk([{_Lbl,#b_blk{is=Is}}|Linear], Fun, Found0) -> - Found = find_appends_is(Is, Fun, Found0), - find_appends_blk(Linear, Fun, Found); -find_appends_blk([], _, Found) -> - Found. - -find_appends_is([#b_set{dst=Dst, op=bs_create_bin, - args=[#b_literal{val=append}, - _, - Lit=#b_literal{val= <<>>}|_]}|Is], - Fun, Found0) -> - %% Special case for when the first fragment is a literal <<>> as - %% it won't be annotated as unique nor will it die with the - %% instruction. - AlreadyFound = maps:get(Fun, Found0, []), - Found = Found0#{Fun => [{append,Dst,Lit}|AlreadyFound]}, - find_appends_is(Is, Fun, Found); -find_appends_is([#b_set{dst=Dst, op=bs_create_bin, - args=[#b_literal{val=append},SegmentInfo,Var|_], - anno=#{first_fragment_dies:=Dies}=Anno}|Is], - Fun, Found0) -> - case Dies andalso is_unique(Var, Anno) - andalso is_appendable(Anno, SegmentInfo) of - true -> - AlreadyFound = maps:get(Fun, Found0, []), - Found = Found0#{Fun => [{append,Dst,Var}|AlreadyFound]}, - find_appends_is(Is, Fun, Found); - false -> - find_appends_is(Is, Fun, Found0) - end; -find_appends_is([_|Is], Fun, Found) -> - find_appends_is(Is, Fun, Found); -find_appends_is([], _, Found) -> - Found. - -is_unique(Var, Anno) -> - ordsets:is_element(Var, maps:get(unique, Anno, [])). - -is_appendable(Anno, #b_literal{val=[SegmentUnit|_]}) - when is_integer(SegmentUnit) -> - case Anno of - #{arg_types:=#{2:=#t_bitstring{appendable=true,size_unit=SizeUnit}}} -> - SizeUnit rem SegmentUnit == 0; - _ -> - false - end. - --record(def_st, - { - funcdb, - stmap, - defsdb = #{}, - literals = #{}, - valuesdb = #{} - }). - -find_defs(As, StMap, FuncDb) -> - find_defs_1(As, #def_st{funcdb=FuncDb,stmap=StMap}). - -find_defs_1([{Fun,{append,Dst,_Arg}}|Work], DefSt0=#def_st{stmap=StMap}) -> - #{Fun:=#opt_st{ssa=SSA,args=Args}} = StMap, - {DefsInFun,DefSt} = defs_in_fun(Fun, Args, SSA, DefSt0), - ValuesInFun = values_in_fun(Fun, DefSt), - ?DP("*** append in: ~p ***~n", [Fun]), - track_value_in_fun([{Dst,self}], Fun, Work, - DefsInFun, ValuesInFun, DefSt); -find_defs_1([{Fun,{track_call_argument,Callee,Element,Idx}}|Work], - DefSt0=#def_st{stmap=StMap}) -> - #{Fun:=#opt_st{ssa=SSA,args=Args}} = StMap, - ?DP("*** Tracking ~p of the ~p:th argument in call to ~p" - " in the function ~p ***~n", [Element, Idx, Callee, Fun]), - - {DefsInFun,DefSt1} = defs_in_fun(Fun, Args, SSA, DefSt0), - ValuesInFun = values_in_fun(Fun, DefSt1), - {Vars,DefSt} = - get_call_arguments(Callee, Element, Idx, DefsInFun, Fun, DefSt1), - ?DP(" Vars to track: ~p~n", [Vars]), - track_value_in_fun(Vars, Fun, Work, DefsInFun, ValuesInFun, DefSt); -find_defs_1([{Fun,{track_result,Element}}|Work], - DefSt0=#def_st{stmap=StMap}) -> - #{Fun:=#opt_st{ssa=SSA,args=Args}} = StMap, - {DefsInFun,DefSt1} = defs_in_fun(Fun, Args, SSA, DefSt0), - ValuesInFun = values_in_fun(Fun, DefSt0), - ?DP("*** Tracking ~p of the result of ~p ***~n", - [Fun, Element]), - {Results,DefSt} = get_results(SSA, Element, Fun, DefSt1), - ?DP("values to track inside the function: ~p~n", [Results]), - track_value_in_fun(Results, Fun, Work, DefsInFun, ValuesInFun, DefSt); - -find_defs_1([], DefSt) -> - DefSt#def_st.literals. - -%% Find all variables which are returned and return them in a worklist -get_results(SSA, Element, Fun, DefSt) -> - get_results(SSA, [], Element, Fun, DefSt). - -get_results([{_,#b_blk{last=#b_ret{arg=#b_var{}=V}}}|Rest], - Acc, Element, Fun, DefSt) -> - get_results(Rest, [{V,Element}|Acc], Element, Fun, DefSt); -get_results([{Lbl,#b_blk{last=#b_ret{arg=#b_literal{val=Lit}}}}|Rest], - Acc, Element, Fun, DefSt0) -> - %% As tracking doesn't make any attempt to use type information to - %% exclude execution paths not relevant when tracking an - %% appendable binary, it can happen that we encounter literals - %% which do not match the type of the element. We can safely stop - %% the tracking in that case. - Continue = case Element of - {tuple_element,_,_} -> - is_tuple(Lit); - self -> - is_bitstring(Lit); - {hd,_} -> - is_list(Lit) andalso (Lit =/= []) - end, - DefSt = if Continue -> - add_literal(Fun, {ret,Lbl,Element}, DefSt0); - true -> - DefSt0 - end, - get_results(Rest, Acc, Element, Fun, DefSt); -get_results([_|Rest], Acc, Element, Fun, DefSt) -> - get_results(Rest, Acc, Element, Fun, DefSt); -get_results([], Acc, _, _Fun, DefSt) -> - {Acc, DefSt}. - -track_value_in_fun([{#b_var{}=V,Element}|Rest], Fun, Work, - Defs, ValuesInFun, DefSt0) - when is_map_key({V,Element}, ValuesInFun) -> - %% We have already explored this value. - ?DP("We have already explored ~p of ~p in ~p~n", [Element, V, Fun]), - track_value_in_fun(Rest, Fun, Work, Defs, ValuesInFun, DefSt0); -track_value_in_fun([{#b_var{}=V,Element}|Rest], Fun, Work0, Defs, - ValuesInFun0, DefSt0=#def_st{}) -> - ?DP("Tracking ~p of ~p in fun ~p~n", [Element, V, Fun]), - ValuesInFun = ValuesInFun0#{{V,Element}=>visited}, - case Defs of - #{V:=#b_set{dst=V,op=Op,args=Args}} -> - case {Op,Args,Element} of - {bs_create_bin,[#b_literal{val=append},_,Arg|_],self} -> - track_value_in_fun([{Arg,self}|Rest], Fun, Work0, - Defs, ValuesInFun, DefSt0); - {bs_create_bin,[#b_literal{val=private_append},_,_|_],self} -> - %% If the code has already been rewritten to use - %% private_append, tracking the accumulator to - %% ensure that it is is writable has already - %% been seen to, so no need to track it. - track_value_in_fun(Rest, Fun, Work0, - Defs, ValuesInFun, DefSt0); - {bs_init_writable,_,self} -> - %% bs_init_writable creates a writable binary, so - %% we are done. - track_value_in_fun(Rest, Fun, Work0, - Defs, ValuesInFun, DefSt0); - {call,[#b_local{}=Callee|_Args],_} -> - track_value_into_call(Callee, Element, Fun, Rest, Work0, - Defs, ValuesInFun, DefSt0); - {call,[#b_remote{mod=#b_literal{val=erlang}, - name=#b_literal{val=error}, - arity=1}|_Args],_} -> - %% As erlang:error/1 never returns, we shouldn't - %% try to track this value. - track_value_in_fun(Rest, Fun, Work0, - Defs, ValuesInFun, DefSt0); - {get_hd,[List],_} -> - %% We only handle the case when the tracked value - %% is in the head field of a cons. This is due to - %% the type analyser always assuming that a cons - %% is part of a list and therefore we will never - %% be able to safely rewrite an accumulator in the - %% tail field of the cons, thus we will never - %% have to track it. - track_value_in_fun( - [{List,{hd,Element}}|Rest], Fun, Work0, - Defs, ValuesInFun, DefSt0); - {get_tuple_element,[#b_var{}=Tuple,#b_literal{val=Idx}],_} -> - track_value_in_fun( - [{Tuple,{tuple_element,Idx,Element}}|Rest], Fun, Work0, - Defs, ValuesInFun, DefSt0); - {phi,_,_} -> - {ToExplore,DefSt} = handle_phi(Fun, V, Args, - Element, DefSt0), - track_value_in_fun(ToExplore ++ Rest, Fun, Work0, - Defs, ValuesInFun, DefSt); - {put_tuple,_,_} when Element =/= self -> - track_put_tuple(Args, Element, Rest, Fun, V, Work0, - Defs, ValuesInFun, DefSt0); - {put_list,_,_} when Element =/= self -> - track_put_list(Args, Element, Rest, Fun, V, Work0, - Defs, ValuesInFun, DefSt0); - {_,_,_} -> - %% Above we have handled all operations through - %% which we are able to track the value to its - %% construction. All other operations are from - %% execution paths not reachable when the actual - %% type (at runtime) is a relevant bitstring. - %% Thus we can safely abort the tracking here. - track_value_in_fun(Rest, Fun, Work0, - Defs, ValuesInFun, DefSt0) - end; - #{V:={arg,Idx}} -> - track_value_into_caller(Element, Idx, Rest, Fun, Work0, Defs, - ValuesInFun, DefSt0) - end; -track_value_in_fun([{#b_literal{},_}|Rest], Fun, Work, - Defs, ValuesInFun, DefSt) -> - track_value_in_fun(Rest, Fun, Work, Defs, ValuesInFun, DefSt); -track_value_in_fun([], Fun, Work, _Defs, ValuesInFun, - DefSt0=#def_st{valuesdb=ValuesDb0}) -> - %% We are done with this function. Store the result in the - %% valuesdb and continue with the work list. - DefSt = DefSt0#def_st{valuesdb=ValuesDb0#{Fun=>ValuesInFun}}, - find_defs_1(Work, DefSt). - -track_value_into_call(Callee, Element, CallerFun, CallerWork, GlobalWork0, - CallerDefs, CallerValuesInFun, DefSt0) -> - GlobalWork = [{Callee, {track_result, Element}}|GlobalWork0], - track_value_in_fun(CallerWork, CallerFun, GlobalWork, - CallerDefs, CallerValuesInFun, DefSt0). - -track_value_into_caller(Element, ArgIdx, - CalledFunWorklist, CalledFun, - GlobalWorklist0, - CalledFunDefs, CalledFunValues, - DefSt0=#def_st{funcdb=FuncDb,stmap=StMap}) -> - #func_info{in=Callers} = map_get(CalledFun, FuncDb), - ?DP("Track into callers of ~p, tracking arg-idx:~p, ~p~n callers:~p~n", - [CalledFun, ArgIdx, Element, Callers]), - %% The caller information in func_info does not remove a caller - %% when it is inlined into another function (although the new - %% caller is added), so we have to filter out functions which lack - %% entries in the st_map (as they are dead, they have been removed - %% from the stmap). - Work = [ {Caller,{track_call_argument,CalledFun,Element,ArgIdx}} - || Caller <- Callers, is_map_key(Caller, StMap)], - GlobalWorklist = Work ++ GlobalWorklist0, - track_value_in_fun(CalledFunWorklist, CalledFun, GlobalWorklist, - CalledFunDefs, CalledFunValues, DefSt0). - -track_put_tuple(FieldVars, {tuple_element,Idx,Element}, - Work, Fun, Dst, GlobalWork, - Defs, ValuesInFun, DefSt0) -> - %% The value we are tracking was constructed by a put tuple and we - %% are interested in continuing the tracking of the field - case lists:nth(Idx + 1, FieldVars) of - ToTrack = #b_var{} -> - track_value_in_fun([{ToTrack,Element}|Work], Fun, GlobalWork, - Defs, ValuesInFun, DefSt0); - #b_literal{val=Lit} -> - DefSt = add_literal(Fun, {opargs,Dst,Idx,Lit,Element}, DefSt0), - track_value_in_fun(Work, Fun, GlobalWork, - Defs, ValuesInFun, DefSt) - end; -track_put_tuple(_FieldVars, {hd,_}, - Work, Fun, _Dst, GlobalWork, - Defs, ValuesInFun, DefSt) -> - track_value_in_fun(Work, Fun, GlobalWork, - Defs, ValuesInFun, DefSt). - -track_put_list([Hd,_Tl], {hd,Element}, - Work, Fun, Dst, GlobalWork, - Defs, ValuesInFun, DefSt0) -> - %% The value we are tracking was constructed by a put list and we - %% are interested in continuing the tracking of the field. We only - %% handle the case when the tracked value is in the head field of - %% a cons. This is due to the type analyser always assuming that a - %% cons is part of a list and therefore we will never be able to - %% safely rewrite an accumulator in the tail field of the cons, - %% thus we will never have to track it. - case Hd of - #b_var{} -> - track_value_in_fun([{Hd,Element}|Work], Fun, GlobalWork, - Defs, ValuesInFun, DefSt0); - #b_literal{val=Lit} -> - DefSt = add_literal(Fun, {opargs,Dst,0,Lit,Element}, DefSt0), - track_value_in_fun(Work, Fun, GlobalWork, Defs, ValuesInFun, DefSt) - end; -track_put_list([_Hd,_Tl], {tuple_element,_,_}, Work, Fun, _Dst, GlobalWork, - Defs, ValuesInFun, DefSt) -> - track_value_in_fun(Work, Fun, GlobalWork, Defs, ValuesInFun, DefSt). - -%% Find all calls to Callee and produce a work-list containing all -%% values which are used as the Idx:th argument. -get_call_arguments(Callee, Element, Idx, Defs, Fun, DefSt0) -> - %% We traverse all defs inside the caller to find the calls. - maps:fold(fun(_, #b_set{dst=Dst,op=call,args=[Target|Args]}, {Acc,DefSt}) - when Callee =:= Target -> - {Values,DefSt1} = - gca(Args, Element, Idx, Fun, Dst, DefSt), - {Values ++ Acc, DefSt1}; - (_, _, Acc) -> - Acc - end, {[], DefSt0}, Defs). - -gca(Args, Element, Idx, Fun, Dst, DefSt) -> - gca(Args, 0, Element, Idx, Fun, Dst, DefSt). - -gca([#b_var{}=V|_], I, Element, I, _Fun, _Dst, DefSt) -> - %% This is the argument we are tracking. - {[{V,Element}], DefSt}; -gca([#b_literal{val=Lit}|_], I, self, I, _Fun, _Dst, DefSt) - when not is_bitstring(Lit)-> - %% As value tracking is done without type information, we can - %% follow def chains which don't terminate in a bitstring. This is - %% harmless, but we should ignore them and not, later on, try to - %% patch them to a bs_writable_binary. - {[], DefSt}; -gca([#b_literal{val=Lit}|_], I, Element, I, Fun, Dst, DefSt) -> - {[], add_literal(Fun, {opargs,Dst,I+1,Lit,Element}, DefSt)}; -gca([_|Args], I, Element, Idx, Fun, Dst, DefSt) -> - gca(Args, I + 1, Element, Idx, Fun, Dst, DefSt). - -handle_phi(Fun, Dst, Args, Element, DefSt0) -> - foldl(fun({#b_literal{val=Lit},Lbl}, {Acc,DefStAcc0}) -> - DefStAcc = - add_literal(Fun, {phi,Dst,Lbl,Lit,Element}, DefStAcc0), - {Acc, DefStAcc}; - ({V=#b_var{},_Lbl}, {Acc,DefStAcc}) -> - {[{V,Element}|Acc],DefStAcc} - end, {[],DefSt0}, Args). - -%% Cache calculation of the defs for a function so we only do it once. -defs_in_fun(Fun, Args, SSA, DefSt=#def_st{defsdb=DefsDb}) -> - case DefsDb of - #{Fun:=Defs} -> - {Defs, DefSt}; - #{} -> - BlockMap = maps:from_list(SSA), - Labels = maps:keys(BlockMap), - Defs0 = beam_ssa:definitions(Labels, BlockMap), - {Defs,_} = foldl(fun(Arg, {Acc,Idx}) -> - {Acc#{Arg => {arg,Idx}}, Idx + 1} - end, {Defs0,0}, Args), - {Defs, DefSt#def_st{defsdb=DefsDb#{Fun=>Defs}}} - end. - - -%% Look up what we know about the values in Fun. -values_in_fun(Fun, #def_st{valuesdb=ValuesDb}) -> - maps:get(Fun, ValuesDb, #{}). - -add_literal(Fun, LitInfo, DefSt=#def_st{literals=Ls}) -> - Old = maps:get(Fun, Ls, []), - DefSt#def_st{literals=Ls#{Fun => [LitInfo|Old]}}. - -patch_appends(Bins, Appends, StMap0) -> - ?DP("Appends:~n~p~n", [Appends]), - ?DP("Bins:~n~p~n", [Bins]), - - %% Group by function - Patches = foldl(fun({Fun,Append}, Acc) -> - Acc#{Fun => [Append|maps:get(Fun, Acc, [])] } - end, Bins, Appends), - ?DP("Patches:~n~p~n", [Patches]), - maps:fold(fun(Fun, Ps, StMapAcc) -> - OptSt=#opt_st{ssa=SSA0,cnt=Cnt0} = - map_get(Fun, StMapAcc), - {SSA,Cnt} = patch_appends_f(SSA0, Cnt0, Ps), - StMapAcc#{Fun => OptSt#opt_st{ssa=SSA,cnt=Cnt}} - end, StMap0, Patches). - -patch_appends_f(SSA0, Cnt0, Patches) -> - ?DP("Will patch ~p~n", [Patches]), - ?DP("SSA: ~p~n", [SSA0]), - %% Group by PD - PD = foldl(fun(P, Acc) -> - case P of - {opargs,Dst,_,_,_} -> ok; - {append,Dst,_} -> ok; - {phi,Dst,_,_,_} -> ok; - {ret,Dst,_} -> ok - end, - Set = ordsets:add_element(P, maps:get(Dst, Acc, [])), - Acc#{Dst => Set} - end, #{}, Patches), - ?DP("PD: ~p~n", [PD]), - patch_appends_f(SSA0, Cnt0, PD, [], []). - -patch_appends_f([{Lbl,Blk=#b_blk{is=Is0,last=Last0}}|Rest], - Cnt0, PD0, Acc0, BlockAdditions0) -> - {Last,Extra,Cnt2,PD} = - case PD0 of - #{ Lbl := Patches } -> - {Last1,Extra0,Cnt1} = patch_appends_ret(Last0, Patches, Cnt0), - {Last1, reverse(Extra0), Cnt1, maps:remove(Lbl, PD0)}; - #{} -> - {Last0, [], Cnt0, PD0} - end, - {Is, Cnt, BlockAdditions} = patch_appends_is(Is0, PD, Cnt2, [], []), - Acc = [{Lbl,Blk#b_blk{is=Is ++ Extra, last=Last}}|Acc0], - patch_appends_f(Rest, Cnt, PD, Acc, BlockAdditions ++ BlockAdditions0 ); -patch_appends_f([], Cnt, _PD, Acc, BlockAdditions) -> - ?DP("BlockAdditions: ~p~n", [BlockAdditions]), - Linear = insert_block_additions(Acc, maps:from_list(BlockAdditions), []), - ?DP("SSA-result:~n~p~n", [Linear]), - {Linear, Cnt}. - -patch_appends_is([I0=#b_set{dst=Dst}|Rest], PD0, Cnt0, Acc, BlockAdditions0) - when is_map_key(Dst, PD0) -> - #{ Dst := Patches } = PD0, - PD = maps:remove(Dst, PD0), - ExtractOpargs = fun({opargs,D,Idx,Lit,Element}) when Dst =:= D -> - {Idx, Lit, Element} - end, - case Patches of - [{opargs,Dst,_,_,_}|_] -> - Ps = keysort(1, map(ExtractOpargs, Patches)), - {Is, Cnt} = patch_opargs(I0, Ps, Cnt0), - patch_appends_is(Rest, PD, Cnt, Is++Acc, BlockAdditions0); - [{append,Dst,#b_literal{val= <<>>}=Lit}] -> - %% Special case for when the first fragment is a literal - %% <<>> and it has to be replaced with a bs_init_writable. - #b_set{op=bs_create_bin,dst=Dst,args=Args0}=I0, - [#b_literal{val=append},SegInfo,Lit|OtherArgs] = Args0, - {V,Cnt} = new_var(Cnt0), - Init = #b_set{op=bs_init_writable,dst=V,args=[#b_literal{val=256}]}, - I = I0#b_set{args=[#b_literal{val=private_append}, - SegInfo,V|OtherArgs]}, - patch_appends_is(Rest, PD, Cnt, [I,Init|Acc], BlockAdditions0); - [{append,Dst,_}] -> - #b_set{op=bs_create_bin,dst=Dst,args=Args0}=I0, - [#b_literal{val=append}|OtherArgs] = Args0, - I = I0#b_set{args=[#b_literal{val=private_append}|OtherArgs]}, - patch_appends_is(Rest, PD, Cnt0, [I|Acc], BlockAdditions0); - [{phi,Dst,_,_,_}|_] -> - {I, Extra, Cnt} = patch_phi(I0, Patches, Cnt0), - patch_appends_is(Rest, PD, Cnt, [I|Acc], Extra ++ BlockAdditions0) - end; -patch_appends_is([I|Rest], PD, Cnt, Acc, BlockAdditions) -> - patch_appends_is(Rest, PD, Cnt, [I|Acc], BlockAdditions); -patch_appends_is([], _, Cnt, Acc, BlockAdditions) -> - {reverse(Acc), Cnt, BlockAdditions}. - -%% The only time when we patch a return is when it returns a -%% literal. -patch_appends_ret(Last=#b_ret{arg=#b_literal{val=Lit}}, Patches, Cnt0) - when is_list(Lit); is_tuple(Lit) -> - Ps = keysort(1, [E || {ret,_,E} <- Patches]), - ?DP("patch_appends_ret tuple or list :~n lit: ~p~n patches: ~p~n", [Lit, Ps]), - {V,Extra,Cnt} = patch_literal_term(Lit, Ps, Cnt0), - {Last#b_ret{arg=V}, Extra, Cnt}; -patch_appends_ret(Last=#b_ret{arg=#b_literal{val=Lit}}, - [{ret,_,Element}], - Cnt0) -> - ?DP("patch_appends_ret other:~n lit: ~p~n element: ~p~n", [Lit, Element]), - {V,Extra,Cnt} = patch_literal_term(Lit, Element, Cnt0), - {Last#b_ret{arg=V}, Extra, Cnt}. - -%% Should return the instructions in reversed order -patch_opargs(I0=#b_set{args=Args}, Patches0, Cnt0) -> - ?DP("Patching args in ~p~nArgs: ~p~n Patches: ~p~n", - [I0,Args,Patches0]), - Patches = merge_arg_patches(Patches0), - {PatchedArgs, Is, Cnt} = patch_opargs(Args, Patches, 0, [], [], Cnt0), - {[I0#b_set{args=reverse(PatchedArgs)}|Is], Cnt}. - -patch_opargs([#b_literal{val=Lit}|Args], [{Idx,Lit,Element}|Patches], - Idx, PatchedArgs, Is, Cnt0) -> - ?DP("Patching arg idx ~p~n lit: ~p~n elem: ~p~n", [Idx, Lit, Element]), - {Arg,Extra,Cnt} = patch_literal_term(Lit, Element, Cnt0), - patch_opargs(Args, Patches, Idx + 1, [Arg|PatchedArgs], Extra ++ Is, Cnt); -patch_opargs([Arg|Args], Patches, Idx, PatchedArgs, Is, Cnt) -> - ?DP("Skipping arg idx ~p~n arg: ~p~n patches: ~p~n", - [Idx,Arg,Patches]), - patch_opargs(Args, Patches, Idx + 1, [Arg|PatchedArgs], Is, Cnt); -patch_opargs([], [], _, PatchedArgs, Is, Cnt) -> - {PatchedArgs, Is, Cnt}. - -%% The way find_defs and find_appends work, we can end up with -%% multiple patches patching different parts of a tuple or pair. We -%% merge them here. -merge_arg_patches([{Idx,Lit,P0},{Idx,Lit,P1}|Patches]) -> - P = case {P0, P1} of - {{tuple_element,I0,E0},{tuple_element,I1,E1}} -> - {tuple_elements,[{I0,E0},{I1,E1}]}; - {{tuple_elements,Es},{tuple_element,I,E}} -> - {tuple_elements,[{I,E}|Es]}; - {_,_} -> - [P0|merge_arg_patches([P1|Patches])] - end, - merge_arg_patches([{Idx,Lit,P}|Patches]); -merge_arg_patches([P|Patches]) -> - [P|merge_arg_patches(Patches)]; -merge_arg_patches([]) -> - []. - -patch_phi(I0=#b_set{op=phi,args=Args0}, Patches, Cnt0) -> - L2P = foldl(fun(Phi={phi,_,Lbl,_,_}, Acc) -> - Acc#{Lbl => Phi} - end, #{}, Patches), - {Args, Extra, Cnt} = - foldr(fun(Arg0={_, Lbl}, {ArgsAcc, ExtraAcc, CntAcc}) -> - case L2P of - #{Lbl := {phi,_,Lbl,Lit,Element}} -> - {Arg,Extra,Cnt1} = - patch_literal_term(Lit, Element, CntAcc), - {[{Arg,Lbl}|ArgsAcc], - [{Lbl, Extra}|ExtraAcc], Cnt1}; - _ -> - {[Arg0|ArgsAcc], ExtraAcc, CntAcc} - end - end, {[], [], Cnt0}, Args0), - I = I0#b_set{op=phi,args=Args}, - {I, Extra, Cnt}. - -%% Should return the instructions in reversed order -patch_literal_term(Tuple, {tuple_elements,Elems}, Cnt) -> - Es = [{tuple_element,I,E} || {I,E} <- keysort(1, Elems)], - patch_literal_tuple(Tuple, Es, Cnt); -patch_literal_term(Tuple, Elements0, Cnt) when is_tuple(Tuple) -> - Elements = if is_list(Elements0) -> Elements0; - true -> [Elements0] - end, - patch_literal_tuple(Tuple, Elements, Cnt); -patch_literal_term(<<>>, self, Cnt0) -> - {V,Cnt} = new_var(Cnt0), - I = #b_set{op=bs_init_writable,dst=V,args=[#b_literal{val=256}]}, - {V, [I], Cnt}; -patch_literal_term(Lit, self, Cnt) -> - {#b_literal{val=Lit}, [], Cnt}; -patch_literal_term([H0|T0], {hd,Element}, Cnt0) -> - {H,Extra,Cnt1} = patch_literal_term(H0, Element, Cnt0), - {T,[],Cnt1} = patch_literal_term(T0, [], Cnt1), - {Dst,Cnt} = new_var(Cnt1), - I = #b_set{op=put_list,dst=Dst,args=[H,T]}, - {Dst, [I|Extra], Cnt}; -patch_literal_term([_|_]=Pair, Elems, Cnt) when is_list(Elems) -> - [Elem] = [E || {hd,_}=E <- Elems], - patch_literal_term(Pair, Elem, Cnt); -patch_literal_term(Lit, [], Cnt) -> - {#b_literal{val=Lit}, [], Cnt}. - -patch_literal_tuple(Tuple, Elements0, Cnt) -> - ?DP("Will patch literal tuple~n tuple:~p~n elements: ~p~n", - [Tuple,Elements0]), - Elements = [ E || {tuple_element,_,_}=E <- Elements0], - patch_literal_tuple(erlang:tuple_to_list(Tuple), Elements, [], [], 0, Cnt). - -patch_literal_tuple([Lit|LitElements], [{tuple_element,Idx,Element}|Elements], - Patched, Extra, Idx, Cnt0) -> - ?DP("patch_literal_tuple: idx:~p~n Lit: ~p~n patch: ~p~n", - [Idx, Lit, Element]), - {V,Exs,Cnt} = patch_literal_term(Lit, Element, Cnt0), - patch_literal_tuple(LitElements, Elements, [V|Patched], - Exs ++ Extra, Idx + 1, Cnt); -patch_literal_tuple([Lit|LitElements], Patches, Patched, Extra, Idx, Cnt) -> - ?DP("patch_literal_tuple: skipping idx:~p~n Lit: ~p~n patches: ~p~n", [Idx, Lit, Patches]), - {T,[],Cnt} = patch_literal_term(Lit, [], Cnt), - patch_literal_tuple(LitElements, Patches, [T|Patched], - Extra, Idx + 1, Cnt); -patch_literal_tuple([], [], Patched, Extra, _, Cnt0) -> - {V,Cnt} = new_var(Cnt0), - I = #b_set{op=put_tuple,dst=V,args=reverse(Patched)}, - {V, [I|Extra], Cnt}. - -new_var(Count) -> - {#b_var{name=Count},Count+1}. - -%% Done with an accumulator to reverse the reversed block order from -%% patch_appends_f/5. -insert_block_additions([Blk0={L,B=#b_blk{is=Is0}}|RevLinear], - Lbl2Addition, Acc) -> - Blk = case Lbl2Addition of - #{ L := Additions} -> - Is = Is0 ++ reverse(Additions), - {L,B#b_blk{is=Is}}; - _ -> - Blk0 - end, - insert_block_additions(RevLinear, Lbl2Addition, [Blk|Acc]); -insert_block_additions([], _, Acc) -> - Acc. diff --git a/lib/compiler/src/beam_ssa_ss.erl b/lib/compiler/src/beam_ssa_ss.erl new file mode 100644 index 000000000000..1836d771034f --- /dev/null +++ b/lib/compiler/src/beam_ssa_ss.erl @@ -0,0 +1,848 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2024. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% + +%% +%% This module provides a database storing the sharing status for the +%% variables in a function. The database is represented as a graph +%% (beam_digraph) where the nodes are variables and values. Edges are +%% extraction and embedding operations. Nodes are labeled with the +%% sharing status of the variable and edges are labeled with the type +%% of extraction/embedding operation. +%% + +-module(beam_ssa_ss). + +-compile({inline,[add_edge/4, add_vertex/3]}). + +-export([add_var/3, + derive_from/3, + embed_in/3, + extract/4, + get_status/2, + initialize_in_args/1, + meet_in_args/1, + merge/2, + merge_in_args/3, + new/0, + new/3, + prune/2, + set_call_result/4, + set_status/3, + variables/1]). + +-include("beam_ssa.hrl"). +-include("beam_types.hrl"). + +-import(lists, [foldl/3]). + +-define(ARGS_DEPTH_LIMIT, 4). + +%% -define(DEBUG, true). + +-ifdef(DEBUG). +-define(DP(FMT, ARGS), io:format(FMT, ARGS)). +-define(DP(FMT), io:format(FMT)). +-define(DBG(STMT), STMT). +-else. +-define(DP(FMT, ARGS), skip). +-define(DP(FMT), skip). +-define(DBG(STMT), skip). +-endif. + + +%% Uncomment the following to check that all invariants for the state +%% hold. These checks are expensive and not enabled by default. + +%% -define(EXTRA_ASSERTS, true). + +-ifdef(EXTRA_ASSERTS). +-define(assert_state(State), assert_state(State)). +-define(ASSERT(Assert), Assert). +-else. +-define(assert_state(State), State). +-define(ASSERT(Assert), skip). +-endif. + +-type sharing_state() :: any(). % A beam_digraph graph. +-type sharing_status() :: 'unique' | 'aliased'. +-type element() :: 'hd' | 'tl' | non_neg_integer(). + +-spec add_var(beam_ssa:b_var(), sharing_status(), sharing_state()) -> + sharing_state(). +add_var(V, Status, State) -> + ?DP("Adding variable ~p as ~p~n", [V,Status]), + ?assert_state(add_vertex(State, V, Status)). + +add_vertex(State, V, Status) -> + ?ASSERT(case V of + #b_var{} -> ok; + plain -> ok; + #b_literal{} -> ok + end), + beam_digraph:add_vertex(State, V, Status). + +add_edge(State, Src, Dst, Lbl) -> + ?ASSERT(begin + case Src of + #b_var{} -> ok; + plain -> ok; + #b_literal{} -> ok + end, + #b_var{} = Dst, + case Lbl of + {extract,Element} when is_integer(Element); + Element =:= hd ; Element =:= tl -> + ok; + embed -> + ok; + {embed,Element} when is_integer(Element); + Element =:= hd ; Element =:= tl -> + ok + end + end), + beam_digraph:add_edge(State, Src, Dst, Lbl). + +-spec derive_from(beam_ssa:b_var(), beam_ssa:b_var(), sharing_state()) -> + sharing_state(). +derive_from(Dst, Src, State) -> + ?DP("Deriving ~p from ~p~nSS:~p~n", [Dst,Src,State]), + ?assert_state(State), + ?ASSERT(assert_variable_exists(Dst, State)), + ?ASSERT(assert_variable_exists(Src, State)), + case {beam_digraph:vertex(State, Dst),beam_digraph:vertex(State, Src)} of + {aliased,_} -> + %% Nothing to do, already aliased. This can happen when + %% handling Phis, no propagation to the source should be + %% done. + State; + {_,aliased} -> + %% The source is aliased, the destination will become aliased. + ?assert_state(set_status(Dst, aliased, State)); + {_,_} -> + case has_out_edges(Src, State) of + true -> + %% The source has already been embedded in a term, + %% both destination and source will be aliased. + ?assert_state(set_status(Src, aliased, + set_status(Dst, aliased, State))); + false -> + %% Source is not aliased and has not been embedded + %% in a term, record that it now is. + ?assert_state(add_edge(State, Src, Dst, embed)) + end + end. + +-spec embed_in(beam_ssa:b_var(), [{element(),beam_ssa:b_var()}], + sharing_state()) -> sharing_state(). +embed_in(Dst, Elements, State0) -> + ?DP("Embedding ~p into ~p~nSS:~p~n", [Elements,Dst,State0]), + ?assert_state(State0), + ?ASSERT(assert_variable_exists(Dst, State0)), + ?ASSERT([assert_variable_exists(Src, State0) + || {#b_var{},Src} <- Elements]), + foldl(fun({Element,Src}, Acc) -> + add_embedding(Dst, Src, Element, Acc) + end, State0, Elements). + +add_embedding(Dst, Src, Element, State0) -> + ?DP("add_embedding(~p, ~p, ~p, ...)~n", [Dst,Src,Element]), + + %% Create a node for literals as it isn't in the graph. + State1 = case Src of + plain -> + beam_digraph:add_vertex(State0, Src, unique); + #b_literal{} -> + beam_digraph:add_vertex(State0, Src, unique); + _ -> + State0 + end, + + %% Create the edge, this is done regardless of the aliasing status + %% as it is how the status of an element can be looked up. + ?ASSERT(case Element of + hd -> ok; + tl -> ok; + E when is_integer(E), E >= 0 -> ok + end), + State = ?assert_state(add_edge(State1, Src, Dst, {embed,Element})), + + %% If the variable being embedded ends up with more than one + %% out-edge, the source will be aliased, unless it is a plain value. + case beam_digraph:out_edges(State, Src) of + [_] -> + State; + _ when Src =/= plain -> + case Src of + #b_literal{} -> + State; + _ -> + set_status(Src, aliased, State) + end; + _ when Src =:= plain -> + State + end. + +-spec extract(beam_ssa:b_var(), beam_ssa:b_var(), element(), + sharing_state()) -> sharing_state(). +extract(Dst, Src, Element, State) -> + ?DP("Extracting ~p[~p] into ~p~n", [Src,Element,Dst]), + ?assert_state(State), + ?ASSERT(assert_variable_exists(Dst, State)), + ?ASSERT(assert_variable_exists(Src, State)), + + case beam_digraph:vertex(State, Src) of + aliased -> + %% The pair/tuple is aliased, so what is extracted will be aliased. + ?assert_state(set_status(Dst, aliased, State)); + unique -> + %% The aggregate is unique, but elements can be aliased. + OutEdges = beam_digraph:out_edges(State, Src), + ?ASSERT(true = is_integer(Element) + orelse (Element =:= hd) orelse (Element =:= tl)), + extract_element(Dst, Src, Element, OutEdges, State) + end. + +%% Note that extract_element/5 will never be given an out edge with a +%% plain 'embed' label as the source would have been aliased if it was +%% live past the point where it was embedded. +extract_element(Dst, _Src, Element, [{_,Old,{extract,Element}}|_], State) -> + %% This element is already extracted, the old extracted value + %% becomes aliased as well as Dst. + ?assert_state(set_status(Old, aliased, set_status(Dst, aliased, State))); +extract_element(Dst, Src, Element, [{_,_,{extract,_}}|Rest], State) -> + %% This is a recorded extraction of another element. + extract_element(Dst, Src, Element, Rest, State); +extract_element(Dst, Src, Element, [], State0) -> + %% This element has not been extracted before and Src is not + %% aliased (checked in extract/4). It could be that we're about to + %% extract an element which is known to be aliased. + ?DP(" the element has not been extracted so far~n"), + State = ?assert_state(add_edge(State0, Src, Dst, {extract,Element})), + extract_status_for_element(Element, Src, Dst, State). + +extract_status_for_element(Element, Src, Dst, State) -> + ?DP(" extract_status_for_element(~p, ~p)~n", [Element, Src]), + InEdges = beam_digraph:in_edges(State, Src), + extract_status_for_element(InEdges, Element, Src, Dst, State). + +extract_status_for_element([{N,_,{embed,Element}}|_InEdges], + Element, _Src, Dst, State0) -> + ?DP(" found new source ~p~n", [N]), + ?DP(" SS ~p~n", [State0]), + ?DP(" status ~p~n", [beam_digraph:vertex(State0, N)]), + State = set_status(Dst, beam_digraph:vertex(State0, N), State0), + ?DP(" Returned SS ~p~n", [State]), + ?assert_state(State); +extract_status_for_element([{N,_,{extract,SrcElement}}|InEdges], + Element, Src, Dst, State0) -> + ?DP(" found source: ~p[~p]~n", [N,SrcElement]), + Origs = [Var || {Var,_,{embed,SE}} <- beam_digraph:in_edges(State0, N), + SrcElement =:= SE], + ?DP(" original var: ~p~n", [Origs]), + case Origs of + [] -> + ?DP(" no known source~n"), + extract_status_for_element(InEdges, Element, Src, Dst, State0); + [Orig] -> + extract_status_for_element(Element, Orig, Dst, State0) + end; +extract_status_for_element([_Edge|InEdges], Element, Src, Dst, State) -> + ?DP(" ignoring in-edge ~p~n", [_Edge]), + extract_status_for_element(InEdges, Element, Src, Dst, State); +extract_status_for_element([], _Element, _Src, Dst, State0) -> + %% Nothing found, the status will be aliased. + %Status = beam_digraph:vertex(State0, Src), + Status = aliased, + ?DP(" status of ~p will be ~p~n", [Dst, Status]), + State = set_status(Dst, Status, State0), + ?assert_state(State). + +-spec get_status(beam_ssa:b_var(), sharing_state()) -> + sharing_status(). +get_status(V=#b_var{}, State) -> + beam_digraph:vertex(State, V). + +-spec merge(sharing_state(), sharing_state()) -> sharing_state(). +merge(StateA, StateB) -> + ?DP("Merging states~n"), + ?assert_state(StateA), + ?assert_state(StateB), + SizeA = beam_digraph:no_vertices(StateA), + SizeB = beam_digraph:no_vertices(StateB), + %% Always merge the smaller state into the larger. + {Small,Large} = if SizeA < SizeB -> + {StateA,StateB}; + true -> + {StateB,StateA} + end, + ?DP("Merging Small into Large~nLarge:~n"), + ?DP("Small:~n"), + ?DBG(dump(Small)), + ?DP("Large:~n"), + ?DBG(dump(Large)), + R = merge(Large, Small, beam_digraph:vertices(Small), + sets:new([{version,2}]), sets:new([{version,2}])), + ?assert_state(R). + +merge(Dest, Source, [{V,VStatus}|Vertices], Edges0, Forced) -> + + Edges = accumulate_edges(V, Source, Edges0), + DestStatus = case beam_digraph:has_vertex(Dest, V) of + true -> + beam_digraph:vertex(Dest, V); + false -> + false + end, + case {DestStatus,VStatus} of + {Status,Status} -> + %% Same status in both states. + merge(Dest, Source, Vertices, Edges, Forced); + {false,Status} -> + %% V does not exist in Dest. + merge(add_vertex(Dest, V, Status), + Source, Vertices, Edges, Forced); + {unique,aliased} -> + %% Alias in Dest. + merge(set_status(V, aliased, Dest), Source, + Vertices, Edges, Forced); + {aliased,unique} -> + %% V has to be revisited and non-aliased copied parts will + %% be aliased. + merge(Dest, Source, Vertices, Edges, sets:add_element(V, Forced)) + end; +merge(Dest0, _Source, [], Edges, Forced) -> + merge1(Dest0, _Source, sets:to_list(Edges), + sets:new([{version,2}]), Forced). + +merge1(Dest0, Source, [{plain,To,Lbl}|Edges], Fixups, Forced) -> + ?DP(" Adding edge ~p -> ~p, lbl: ~p~n", [plain,To,Lbl]), + Dest = add_edge(Dest0, plain, To, Lbl), + merge1(Dest, Source, Edges, Fixups, Forced); +merge1(Dest0, Source, [{From,To,Lbl}=Edge|Edges], Fixups, Forced) -> + ?DP(" Adding edge ~p -> ~p, lbl: ~p~n", [From,To,Lbl]), + OutEdges = beam_digraph:out_edges(Dest0, From), + case {ordsets:is_element(Edge, OutEdges),ordsets:is_empty(OutEdges)} of + {true,_} -> + ?DP(" Already exists~n"), + merge1(Dest0, Source, Edges, Fixups, Forced); + {false,true} -> + ?DP(" Edge is new~n"), + Dest = add_edge(Dest0, From, To, Lbl), + merge1(Dest, Source, Edges, Fixups, Forced); + {false,false} -> + ?DP(" There are already edges from the node.~n"), + ?DP(" lbl: ~p, out_edges: ~p~n", [Lbl,OutEdges]), + case lists:keyfind(Lbl, 3, OutEdges) of + false -> + ?DP(" There are no edges with the same label.~n"), + Dest = add_edge(Dest0, From, To, Lbl), + merge1(Dest, Source, Edges, Fixups, Forced); + _ -> + ?DP(" There is at least one edge with the same label.~n"), + %% Add the edge, but alias all with the same label. + Dest = add_edge(Dest0, From, To, Lbl), + merge1(Dest, Source, Edges, + sets:add_element({alias,From,Lbl}, Fixups), Forced) + end + end; +merge1(Dest, _Source, [], Fixups, Forced) -> + merge2(Dest, _Source, sets:to_list(Fixups), Forced). + +merge2(Dest0, _Source, [{alias,From,Lbl}|Fixups], Forced) -> + OutEdges = beam_digraph:out_edges(Dest0, From), + Dest = foldl(fun({#b_var{},To,L}, Acc) when L =:= Lbl -> + set_status(To, aliased, Acc); + (_, Acc) -> + Acc + end, Dest0, OutEdges), + merge2(Dest, _Source, Fixups, Forced); +merge2(Dest0, _Source, [], Forced) -> + Dest = foldl(fun set_alias/2, + Dest0, + [get_alias_edges(V, Dest0) || V <- sets:to_list(Forced)]), + ?assert_state(Dest). + +accumulate_edges(V, State, Edges0) -> + InEdges = beam_digraph:in_edges(State, V), + OutEdges = beam_digraph:out_edges(State, V), + foldl(fun sets:add_element/2, + foldl(fun sets:add_element/2, Edges0, InEdges), + OutEdges). + +-spec new() -> sharing_state(). +new() -> + beam_digraph:new(). + +-spec prune(sets:set(beam_ssa:b_var()), sharing_state()) -> sharing_state(). +prune(LiveVars, State) -> + ?assert_state(State), + ?DP("Pruning to ~p~n", [sets:to_list(LiveVars)]), + ?DBG(dump(State)), + R = prune(sets:to_list(LiveVars), [], new(), State), + ?DP("Pruned result~n"), + ?DBG(dump(R)), + ?assert_state(R). + +prune([V|Wanted], Edges, New0, Old) -> + case beam_digraph:has_vertex(New0, V) of + true -> + %% This variable is alread added. + prune(Wanted, Edges, New0, Old); + false -> + %% This variable has to be kept. Add it to the new graph. + New = add_vertex(New0, V, beam_digraph:vertex(Old, V)), + %% Add all incoming edges to this node. + InEdges = beam_digraph:in_edges(Old, V), + InNodes = [From || {From,_,_} <- InEdges], + prune(InNodes ++ Wanted, InEdges ++ Edges, New, Old) + end; +prune([], Edges, New0, _Old) -> + foldl(fun({Src,Dst,Lbl}, New) -> + add_edge(New, Src, Dst, Lbl) + end, New0, Edges). + +-spec set_call_result(beam_ssa:b_var(), call_in_arg_status(), + sharing_state(), non_neg_integer()) -> + {sharing_state(),non_neg_integer()}. +set_call_result(Dst, aliased, SS0, Cnt0) -> + {set_alias([Dst], SS0), Cnt0}; +set_call_result(_Dst, unique, SS0, Cnt0) -> + {SS0, Cnt0}; +set_call_result(_Dst, no_info, SS0, Cnt0) -> + {SS0, Cnt0}; +set_call_result(Dst, {unique,Elements}, SS0, Cnt0) -> + maps:fold(fun(Idx, Element, {SS,Cnt}) -> + V = #b_var{name=Cnt}, + SS1 = add_var(V, unique, SS), + ?ASSERT(case Idx of + hd -> ok; + tl -> ok; + E when is_integer(E), E >= 0 -> ok + end), + SS2 = beam_digraph:add_edge(SS1, V, Dst, {embed,Idx}), + set_call_result(V, Element, SS2, Cnt+1) + end, {SS0,Cnt0}, Elements). + +-spec set_status(beam_ssa:b_var(), sharing_status(), sharing_state()) -> + sharing_state(). +set_status(#b_var{}=V, Status, State0) -> + %% Aliasing a variable does the following: + %% + %% * All extracted elements become aliased (out-edges). + %% + %% * If the variable contains embeddings of other variables + %% (in-edge label is 'embed' or {embed,_}), the embedded + %% variables become aliased. + %% + %% * If the variable is extracted from another variable (it has + %% in-edges matching {extract,_}), the aliasing does not change + %% the status of the source variables. + %% + %% * If the variable is embedded in a term (out-edge label is + %% 'embed' or {embed,_}), the status of the variable in which it + %% is embedded remains unchanged. + + ?DP("Setting status of ~p to ~p~n", [V,Status]), + ?ASSERT(assert_variable_exists(V, State0)), + case beam_digraph:vertex(State0, V) of + Status -> + %% Status is unchanged. + State0; + unique when Status =:= aliased -> + State = add_vertex(State0, V, Status), + set_alias(get_alias_edges(V, State), State) + end. + +set_alias([#b_var{}=V|Vars], State0) -> + %% TODO: fold into the above + case beam_digraph:vertex(State0, V) of + aliased -> + set_alias(Vars, State0); + _ -> + State = add_vertex(State0, V, aliased), + set_alias(get_alias_edges(V, State) ++ Vars, State) + end; +set_alias([], State) -> + State. + +get_alias_edges(V, State) -> + OutEdges = [To || {#b_var{},To,_} <- beam_digraph:out_edges(State, V)], + EmbedEdges = [Src + || {#b_var{}=Src,_,Lbl} <- beam_digraph:in_edges(State, V), + case Lbl of + embed -> true; + {embed,_} -> true; + _ -> false + end], + EmbedEdges ++ OutEdges. + +-spec variables(sharing_state()) -> [beam_ssa:b_var()]. +variables(State) -> + %% TODO: Sink this beam_digraph to avoid splitting the list? + [V || {V,_Lbl} <- beam_digraph:vertices(State)]. + +-type call_in_arg_status() :: no_info + | unique + | aliased + | {unique, term()}. +-type call_in_arg_info() :: [call_in_arg_status()]. +-spec initialize_in_args(['unique' | 'aliased']) -> call_in_arg_info(). +initialize_in_args(ArgInfo) -> + ArgInfo. + +-spec meet_in_args([call_in_arg_status()]) -> call_in_arg_status(). +meet_in_args(Args) -> + ?DP("meet_in_args(~p)~n", [Args]), + case Args of + [Arg] -> + Arg; + [Arg|Rest] -> + meet_in_args(Arg, Rest) + end. + +meet_in_args(aliased, _) -> + aliased; +meet_in_args(_Status, [aliased|_Rest]) -> + aliased; +meet_in_args(unique, [unique|Rest]) -> + meet_in_args(unique, Rest); +meet_in_args({unique,A}, [{unique,B}|Rest]) -> + meet_in_args({unique,meet_in_args_elems(A, B)}, Rest); +meet_in_args({unique,_}=A, [unique|Rest]) -> + meet_in_args(A, Rest); +meet_in_args(unique, [{unique,_}=U|Rest]) -> + meet_in_args(U, Rest); +meet_in_args(no_info, [Status|Rest]) -> + meet_in_args(Status, Rest); +meet_in_args(Status, [no_info|Rest]) -> + meet_in_args(Status, Rest); +meet_in_args(Status, []) -> + ?DP("meet_in_args(...)->~p.~n", [Status]), + Status. + +meet_in_args_elems(A, B) -> + SizeA = map_size(A), + SizeB = map_size(B), + if SizeA < SizeB -> + meet_in_args_elems1(A, B); + true -> + meet_in_args_elems1(B, A) + end. + +meet_in_args_elems1(Small, Large) -> + meet_in_args_elems1(maps:to_list(Small), Large, #{}). + +meet_in_args_elems1([{E,Status0}|Elems], Large0, Result0) -> + {Status,Large} = case Large0 of + #{E:=OtherStatus} -> + {meet_in_args(Status0, [OtherStatus]), + maps:remove(E,Large0)}; + #{} -> + {Status0,Large0} + end, + Result = Result0#{E=>Status}, + meet_in_args_elems1(Elems, Large, Result); +meet_in_args_elems1([], Large, Result) -> + maps:merge(Large, Result). + +-spec merge_in_args([beam_ssa:b_var()], call_in_arg_info(), sharing_state()) + -> call_in_arg_info(). +merge_in_args([Arg|Args], [ArgStatus|Status], State) -> + ?DP(" merge_in_arg: ~p~n current: ~p~n SS: ~p.~n", + [Arg,ArgStatus,State]), + Info = merge_in_arg(Arg, ArgStatus, ?ARGS_DEPTH_LIMIT, State), + [Info|merge_in_args(Args, Status, State)]; +merge_in_args([], [], _State) -> + []. + +merge_in_arg(_, aliased, _, _State) -> + aliased; +merge_in_arg(plain, _, _, _State) -> + unique; +merge_in_arg(#b_var{}=V, _Status, 0, State) -> + %% We will not traverse this argument further, this means that no + %% element-level aliasing info will be kept for this element. + get_status(V, State); +merge_in_arg(#b_var{}=V, Status, Cutoff, State) -> + case beam_digraph:vertex(State, V) of + aliased -> + aliased; + unique -> + InEdges = beam_digraph:in_edges(State, V), + Elements = case Status of + unique -> #{}; + {unique,Es} -> Es; + no_info -> #{} + end, + merge_elements(InEdges, Elements, Cutoff, State) + end; +merge_in_arg(#b_literal{val=[Hd|Tl]}, Status, Cutoff, State) -> + {HdS,TlS,Elements0} = case Status of + {unique,#{hd:=HdS0,tl:=TlS0}=All} -> + {HdS0,TlS0,All}; + {unique,TupleElems} -> + {no_info,no_info,TupleElems}; + unique -> {unique,unique,#{}}; + no_info -> {no_info,no_info,#{}} + end, + Elements = + Elements0#{hd=>merge_in_arg(#b_literal{val=Hd}, HdS, Cutoff-1, State), + tl=>merge_in_arg(#b_literal{val=Tl}, TlS, Cutoff-1, State)}, + {unique,Elements}; +merge_in_arg(#b_literal{val=[]}, Status, _, _State) -> + Status; +merge_in_arg(#b_literal{val=T}, Status, Cutoff, State) when is_tuple(T) -> + SrcElements = tuple_to_list(T), + OrigElements = case Status of + {unique,TupleElems} -> + TupleElems; + unique -> #{}; + no_info -> #{} + end, + Elements = merge_tuple_elems(SrcElements, OrigElements, Cutoff, State), + {unique,Elements}; +merge_in_arg(#b_literal{val=_Lit}, {unique,_}, _Cutoff, _State) -> + ?ASSERT(true = (is_atom(_Lit) orelse is_number(_Lit) orelse is_map(_Lit) + orelse is_bitstring(_Lit) orelse is_function(_Lit))), + unique; +merge_in_arg(#b_literal{val=_Lit}, no_info, _, _State) -> + ?ASSERT(true = (is_atom(_Lit) orelse is_number(_Lit) orelse is_map(_Lit) + orelse is_bitstring(_Lit) orelse is_function(_Lit))), + unique; +merge_in_arg(#b_literal{val=_Lit}, unique, _, _State) -> + ?ASSERT(true = (is_atom(_Lit) orelse is_number(_Lit) orelse is_map(_Lit) + orelse is_bitstring(_Lit) orelse is_function(_Lit))), + unique. + +merge_tuple_elems(SrcElements, OrigElements, Cutoff, State) -> + merge_tuple_elems(SrcElements, 0, OrigElements, Cutoff-1, State). + +merge_tuple_elems([S|SrcElements], Idx, OrigElements0, Cutoff, State) -> + case OrigElements0 of + #{Idx:=Status} -> + E = merge_in_arg(#b_literal{val=S}, Status, Cutoff, State), + OrigElements = OrigElements0#{Idx=>E}, + merge_tuple_elems(SrcElements, Idx+1, OrigElements, Cutoff, State); + #{} -> + E = merge_in_arg(#b_literal{val=S}, no_info, Cutoff, State), + OrigElements = OrigElements0#{Idx=>E}, + merge_tuple_elems(SrcElements, Idx+1, OrigElements, Cutoff, State) + end; +merge_tuple_elems([], _, Elements, _, _) -> + Elements. + +merge_elements([], Elements, _, _State) when map_size(Elements) =:= 0 -> + unique; +merge_elements([], Elements, _, _State) when is_map(Elements) -> + {unique,Elements}; +merge_elements([], no_info, _, _State) -> + %% We're lacking sub-element info, but all embedded elements were + %% unique. + unique; +merge_elements([{Src,_,{embed,Idx}}|Rest], Elements0, Cutoff, State) when + is_map(Elements0) -> + Old = maps:get(Idx, Elements0, no_info), + New = merge_in_arg(Src, Old, Cutoff-1, State), + Elements = Elements0#{Idx=>New}, + merge_elements(Rest, Elements, Cutoff, State); +merge_elements([{_Src,_,embed}|Rest], _Elements0, Cutoff, State) -> + %% We don't know where this element is embedded. Src will always + %% be unique as otherwise erge_in_arg/4 will not bother merging + %% the in-edges. + ?ASSERT(unique = get_status(_Src, State)), + merge_elements(Rest, no_info, Cutoff, State); +merge_elements([{_,V,{extract,_}}|_Rest], _Elements0, _, State) -> + %% For now we don't try to derive the structure of this argument + %% further. + %% TODO: Revisit the decision above. + get_status(V, State). + +-spec new([beam_ssa:b_var()], call_in_arg_info(), non_neg_integer()) -> + sharing_state(). +new(Args, ArgsInfo, Cnt) -> + ?DP("new args: ~p, args-info: ~p, cnt: ~p~n", [Args, ArgsInfo, Cnt]), + R = {SS,_} = new(Args, ArgsInfo, Cnt, new()), + ?assert_state(SS), + R. + +new([A|As], [S0|Stats], Cnt, SS) + when S0 =:= aliased; S0 =:= unique; S0 =:= no_info -> + S = case S0 of + no_info -> unique; + _ -> S0 + end, + new(As, Stats, Cnt, add_var(A, S, SS)); +new([A|As], [{unique,Elements}|Stats], Cnt0, SS0) -> + SS1 = add_var(A, unique, SS0), + {SS,Cnt} = init_elements(Elements, A, Cnt0, SS1), + new(As, Stats, Cnt, SS); +new([], [], Cnt, SS) -> + {SS,Cnt}. + +init_elements(Elements, V, Cnt0, SS0) -> + maps:fold(fun(Idx, Status, {SS,Cnt}) -> + init_element(Idx, Status, V, Cnt, SS) + end, {SS0,Cnt0}, Elements). + +init_element(Idx, Status, Child, Cnt0, SS0) -> + ElemV = #b_var{name=Cnt0}, + {SS1,Cnt} = new([ElemV], [Status], Cnt0+1, SS0), + ?ASSERT(case Idx of + hd -> ok; + tl -> ok; + I when is_integer(I), I >= 0 -> ok + end), + SS = beam_digraph:add_edge(SS1, ElemV, Child, {embed,Idx}), + {SS,Cnt}. + +%% Internal helpers + +-spec has_out_edges(beam_ssa:b_var(), sharing_state()) -> boolean(). +has_out_edges(V, State) -> + beam_digraph:out_edges(State, V) =/= []. + +%% Debug support below + +-ifdef(EXTRA_ASSERTS). + +-spec assert_state(sharing_state()) -> sharing_state(). + +assert_state(State) -> + assert_aliased_parent_implies_aliased(State), + assert_embedded_in_aliased_implies_aliased(State), + assert_multiple_embeddings_force_aliasing(State), + assert_multiple_extractions_force_aliasing(State), + State. + +%% Check that extracted and embedded elements of an aliased variable +%% are aliased. +assert_aliased_parent_implies_aliased(State) -> + [assert_apia(A, State) || {A,aliased} <- beam_digraph:vertices(State)]. + +assert_apia(Parent, State) -> + Children = [Child + || {_,Child,Info} <- beam_digraph:out_edges(State, Parent), + case Info of + {extract,_} -> true; + embed -> true; + {embed,_} -> false + end], + [case beam_digraph:vertex(State, Child) of + aliased -> + ok; + _ -> + io:format("Expected ~p to be aliased as is derived from ~p.~n" + "state: ~p", [Child, Parent, State]), + throw(assertion_failure) + end || Child <- Children]. + +%% Check that elements which are embedded twice or more times are +%% aliased. +assert_embedded_in_aliased_implies_aliased(State) -> + [assert_eiaia(A, State) || {A,aliased} <- beam_digraph:vertices(State)]. + +assert_eiaia(Embedder, State) -> + NotAliased = [ Src + || {Src,_,embed} <- beam_digraph:in_edges(State, Embedder), + beam_digraph:vertex(State, Src) =/= aliased], + case NotAliased of + [] -> + State; + _ -> + io:format("Expected ~p to be aliased as" + " they are embedded in aliased values.~n~p.~n", + [NotAliased, State]), + throw(assertion_failure) + end. + +%% Check that elements which are embedded twice or more times are +%% aliased. +assert_multiple_embeddings_force_aliasing(State) -> + [assert_mefa(V, State) || {V,unique} <- beam_digraph:vertices(State)]. + +assert_mefa(V, State) -> + NotAliased = [ B || {B,_,embed} <- beam_digraph:out_edges(State, V), + beam_digraph:vertex(State, B) =/= aliased], + case NotAliased of + [_,_|_] -> + io:format("Expected ~p in ~p to be aliased.~n", [V,State]), + throw(assertion_failure); + _ -> + State + end. + +%% Check that elements which are extracted twice are aliased. +assert_multiple_extractions_force_aliasing(State) -> + [assert_mxfa(V, State) || {V,_} <- beam_digraph:vertices(State)]. + +assert_mxfa(V, State) -> + %% Build a map of the extracted values keyed by element. + Extracted = foldl( + fun({_,Other,{extract,Elem}}, Acc) + when is_integer(Elem); Elem =:= hd; Elem =:= tl -> + Acc#{Elem=>[Other|maps:get(Elem, Acc, [])]}; + ({_,_,embed}, Acc) -> + Acc; + ({_,_,{embed,_}}, Acc) -> + Acc + end, #{}, beam_digraph:out_edges(State, V)), + Bad = maps:fold( + fun(_Elem, [_,_|_]=Vars, Acc) -> + [X || X <- Vars, beam_digraph:vertex(State, X) =/= aliased] + ++ Acc; + (_, _, Acc) -> + Acc + end, [], Extracted), + case Bad of + [] -> + State; + _ -> + io:format("~p should be aliased~nstate:~p.~n", [V,State]), + throw(assertion_failure) + end. + +assert_variable_exists(plain, State) -> + case beam_digraph:has_vertex(State, plain) of + false -> + io:format("Expected ~p in ~p.~n", [plain,State]), + throw(assertion_failure); + _ -> + case beam_digraph:vertex(State, plain) of + unique -> State; + Other -> + io:format("Expected plain in ~p to be unique," + " was ~p.~n", [State,Other]), + throw(assertion_failure) + end + end; +assert_variable_exists(#b_literal{}, State) -> + State; +assert_variable_exists(#b_var{}=V, State) -> + case beam_digraph:has_vertex(State, V) of + false -> + io:format("Expected ~p in ~p.~n", [V,State]), + throw(assertion_failure); + _ -> + State + end. + +dump(State) -> + io:format("~p~n", [State]). + +-endif. diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl index 33b79139ca45..b6ba31d63f2f 100644 --- a/lib/compiler/src/beam_ssa_type.erl +++ b/lib/compiler/src/beam_ssa_type.erl @@ -651,6 +651,21 @@ benefits_from_type_anno(get_map_element, _Args) -> true; benefits_from_type_anno(has_map_field, _Args) -> true; + +%% The types are used to avoid falsely detecting aliasing of +%% non-boxed things. +benefits_from_type_anno(put_list, _Args) -> + true; +benefits_from_type_anno(put_tuple, _Args) -> + true; +benefits_from_type_anno(get_tuple_element, _Args) -> + true; +benefits_from_type_anno(get_hd, _Args) -> + true; +benefits_from_type_anno(get_tl, _Args) -> + true; +benefits_from_type_anno(update_record, _Args) -> + true; benefits_from_type_anno(_Op, _Args) -> false. diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl index c3e401418ea1..4afbdf0a6b62 100644 --- a/lib/compiler/src/beam_validator.erl +++ b/lib/compiler/src/beam_validator.erl @@ -1503,7 +1503,9 @@ pmt_1([Key0, Value0 | List], Vst, Acc0) -> pmt_1([], _Vst, Acc) -> Acc. -verify_update_record(Size, Src, Dst, List, Vst0) -> +verify_update_record(Size, Src0, Dst, List0, Vst0) -> + Src = unpack_typed_arg(Src0, Vst0), + List = [unpack_typed_arg(Arg, Vst0) || Arg <- List0], assert_type(#t_tuple{exact=true,size=Size}, Src, Vst0), verify_y_init(Vst0), diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 36b67a37fb7e..b0b4ea29b05e 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -2951,11 +2951,12 @@ pre_load() -> beam_ssa_bsm, beam_ssa_codegen, beam_ssa_dead, + beam_ssa_destructive_update, beam_ssa_opt, beam_ssa_pre_codegen, - beam_ssa_private_append, beam_ssa_recv, beam_ssa_share, + beam_ssa_ss, beam_ssa_throw, beam_ssa_type, beam_trim, diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src index 3190b43468c7..c793c22f3ad3 100644 --- a/lib/compiler/src/compiler.app.src +++ b/lib/compiler/src/compiler.app.src @@ -44,13 +44,14 @@ beam_ssa_check, beam_ssa_codegen, beam_ssa_dead, + beam_ssa_destructive_update, beam_ssa_lint, beam_ssa_opt, beam_ssa_pp, beam_ssa_pre_codegen, - beam_ssa_private_append, beam_ssa_recv, beam_ssa_share, + beam_ssa_ss, beam_ssa_throw, beam_ssa_type, beam_trim, diff --git a/lib/compiler/test/beam_ssa_check_SUITE.erl b/lib/compiler/test/beam_ssa_check_SUITE.erl index c1d185cf6831..43e3675f31e7 100644 --- a/lib/compiler/test/beam_ssa_check_SUITE.erl +++ b/lib/compiler/test/beam_ssa_check_SUITE.erl @@ -34,7 +34,8 @@ bs_size_unit_checks/1, private_append_checks/1, ret_annotation_checks/1, - sanity_checks/1]). + sanity_checks/1, + tuple_inplace_checks/1]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -48,7 +49,8 @@ groups() -> appendable_checks, private_append_checks, ret_annotation_checks, - sanity_checks]}, + sanity_checks, + tuple_inplace_checks]}, {post_ssa_opt_dynamic,test_lib:parallel(), [bs_size_unit_checks]}]. @@ -99,6 +101,9 @@ bs_size_unit_checks(Config) when is_list(Config) -> private_append_checks(Config) when is_list(Config) -> run_post_ssa_opt(private_append, Config). +tuple_inplace_checks(Config) when is_list(Config) -> + run_post_ssa_opt(tuple_inplace_checks, Config). + ret_annotation_checks(Config) when is_list(Config) -> run_post_ssa_opt(ret_annotation, Config). diff --git a/lib/compiler/test/beam_ssa_check_SUITE_data/alias.erl b/lib/compiler/test/beam_ssa_check_SUITE_data/alias.erl index fffa5a779f82..5ae1bdfe7ce9 100644 --- a/lib/compiler/test/beam_ssa_check_SUITE_data/alias.erl +++ b/lib/compiler/test/beam_ssa_check_SUITE_data/alias.erl @@ -19,7 +19,7 @@ %% This module tests that beam_ssa_alias_opt:opt/2 correctly annotates %% instructions with information about unique and aliased operands. %% --compile(no_ssa_opt_private_append). +-compile(no_ssa_opt_destructive_update). -module(alias). @@ -68,6 +68,16 @@ binary_part_aliases/2, aliased_map_lookup_bif/1, aliased_map_lookup_instr/1, + + variables_in_put_tuple_unique_0/1, + variables_in_put_tuple_unique_1/1, + variables_in_put_tuple_unique_2/1, + variables_in_put_tuple_unique_3/1, + variables_in_put_tuple_unique_4/1, + variables_in_put_tuple_unique_5/1, + variables_in_put_tuple_unique_6/1, + variables_in_put_tuple_aliased/1, + aliased_tuple_element_bif/1, aliased_tuple_element_bif/2, aliased_tuple_element_instr/1, @@ -78,11 +88,24 @@ aliasing_after_tuple_extract/1, alias_after_pair_hd/1, alias_after_pair_tl/1, + unique_pair/0, + make_unique_pair/1, double_map_lookup/2, double_tuple_element/2, tuple_element_aliasing/0, - tuple_element_from_tuple_with_existing_child/0]). + tuple_element_from_tuple_with_existing_child/0, + + extract_tuple_element/0, + + update_record0/0, + update_record1/0, + + live_past_call_triggers_aliasing/1, + + fuzz0/0, fuzz0/1, + alias_after_phi/0, + check_identifier_type/0]). %% Trivial smoke test transformable0(L) -> @@ -182,7 +205,11 @@ transformable5(L) -> transformable5([H|T], Acc) -> %ssa% (_, Arg1) when post_ssa_opt -> -%ssa% _ = bs_create_bin(append, _, Arg1, _, _, _, X, _) { aliased => [X], unique => [Arg1], first_fragment_dies => true }. +%ssa% _ = bs_create_bin(append, _, Arg1, _, _, _, X, _) { aliased => [Arg1,X] }. + +%% Although does_not_escape/1 does not let its argument escape, it is +%% live across the call and thus aliased in does_not_escape. + does_not_escape(Acc), transformable5(T, <>); transformable5([], Acc) -> @@ -367,16 +394,15 @@ transformable17([H|T], [N|Acc]) -> transformable17([], Acc) -> Acc. -%% We should use type information to figure out that {<<>>, X} is not -%% aliased, but as of now we don't have the information at this pass, -%% nor do we track alias status at the sub-term level. +%% Check that type information is used to figure out that {<<>>, X} is +%% not aliased. transformable18(L, X) when is_integer(X), X < 256 -> transformable18b(L, {<<>>, X}). transformable18b([H|T], {Acc,X}) -> %ssa% (_, Arg1) when post_ssa_opt -> %ssa% A = get_tuple_element(Arg1, 0), -%ssa% B = bs_create_bin(append, _, A, _, _, _, X, _) { aliased => [A], unique => [X], first_fragment_dies => true }, +%ssa% B = bs_create_bin(append, _, A, _, _, _, X, _) { unique => [X,A], first_fragment_dies => true }, %ssa% C = put_tuple(B, _), %ssa% _ = call(fun transformable18b/2, _, C). transformable18b(T, {<>, X}); @@ -483,15 +509,18 @@ make_empty_binary_tuple_nested() -> {<<>>, {<<>>}, 47}. transformable24(L) -> - transformable24(L, {<<>>, ex:foo()}). + transformable24(L, {<<>>, ex:foo(),ex:foo()}). -transformable24([H|T], {Acc,X}) -> +transformable24([H|T], {Acc,X,Y}) -> %ssa% (_, Arg1) when post_ssa_opt -> -%ssa% A = get_tuple_element(Arg1, 0), -%ssa% B = bs_create_bin(append, _, A, _, _, _, X, _) { aliased => [A], unique => [X], first_fragment_dies => true }, -%ssa% C = put_tuple(B, _), -%ssa% _ = call(fun transformable24/2, _, C). - transformable24(T, {<>, X}); +%ssa% X = get_tuple_element(Arg1, 1), +%ssa% Acc = get_tuple_element(Arg1, 0), +%ssa% A = bs_create_bin(append, _, Acc, _, _, _, Sum, _) { unique => [Sum,Acc], first_fragment_dies => true }, +%ssa% Y = get_tuple_element(Arg1, 2), +%% X is unique as it is known to be a number. +%ssa% B = put_tuple(A, X, Y) { aliased => [Y], unique => [A,X] }, +%ssa% _ = call(fun transformable24/2, C, B) { aliased => [C], unique => [B] }. + transformable24(T, {<>,X,Y}); transformable24([], {Acc,_}) -> Acc. @@ -817,6 +846,23 @@ alias_after_pair_tl(N, Acc) -> [_|X] = Acc, alias_after_pair_tl(N - 1, [Acc|<>]). +make_unique_pair(X) when is_integer(X) -> + [X|X]. + +%% No aliasing occurs as Pair dies and only plain values are +%% extracted. +unique_pair() -> +%ssa% () when post_ssa_opt -> +%ssa% P = call(fun make_unique_pair/1, ...), +%ssa% H = get_hd(P), +%ssa% T = get_tl(P), +%ssa% R = put_tuple(H, T, P) {unique => [H, P, T]}, +%ssa% ret(R) {unique => [R]}. + Pair = make_unique_pair(e:f()), + H = hd(Pair), + T = tl(Pair), + {H, T, Pair}. + %% Check that although the map is unique, the extracted values should %% always be aliased as we can't know if they are the same. %% @@ -873,3 +919,222 @@ tuple_element_from_tuple_with_existing_child() -> ok end } ]. +%% Check that the same variable used twice in a put_tuple does not +%% trigger aliasing when the variable's type isn't boxed, but that we +%% do when the types are unknown. These tests need their own private +%% version of the identity function, as the type analysis will +%% otherwise determine that the result type of id/1 (as it is shared +%% between all tests) is {variables_in_put_tuple, any(), any()} and +%% the more specific type information will be lost. +-record(variables_in_put_tuple, {a=0,b=0}). + +variables_in_put_tuple_unique_0(A) when is_atom(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +variables_in_put_tuple_unique_1(A) when is_number(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +variables_in_put_tuple_unique_2(A) when is_integer(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +variables_in_put_tuple_unique_3(A) when is_float(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +variables_in_put_tuple_unique_4(A) when is_pid(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +variables_in_put_tuple_unique_5(A) when is_port(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +variables_in_put_tuple_unique_6(A) when is_reference(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { unique => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +%% Unknown types, no aliasing of the tuple, but the elements should be +%% aliased. +variables_in_put_tuple_aliased(A) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% T0 = put_tuple(_, A0, A0), +%ssa% T = call(_, T0), +%ssa% A = get_tuple_element(T, 1), +%ssa% B = get_tuple_element(T, 2), +%ssa% R = put_tuple(A, B) { aliased => [B,A] }, +%ssa% ret(R) { unique => [R] }. + Id = fun(X) -> X end, + #variables_in_put_tuple{a=X,b=Y} = Id(#variables_in_put_tuple{a=A,b=A}), + {X,Y}. + +id(X) -> + X. + +%% Check that we don't unnecessarily flag a tuple as aliased just +%% because we extract a plain type from it. +generate_integer() -> + case ex:f() of + a -> + 1; + b -> + 2 + end. + +make_tuple() -> + {generate_integer(), generate_integer()}. + +extract_tuple_element() -> +%ssa% () when post_ssa_opt -> +%ssa% R = put_tuple(X, Y, Z) {unique => [Z, Y, X]} , +%ssa% ret(R) { unique => [R] }. + {X,Y} = Z = make_tuple(), + {X,Y,Z}. + +-record(r0, {not_aliased=0,aliased=[]}). + +update_record0() -> + update_record0(ex:f(), #r0{}). + +update_record0([Val|Ls], Acc=#r0{not_aliased=N}) -> +%ssa% (_, Rec) when post_ssa_opt -> +%ssa% _ = update_record(reuse, 3, Rec, 3, A, 2, NA) {unique => [Rec, NA], aliased => [A]}. + R = Acc#r0{not_aliased=N+1,aliased=Val}, + update_record0(Ls, R); +update_record0([], Acc) -> + Acc. + +-record(r1, {not_aliased0=0,not_aliased1=[]}). + +update_record1() -> + update_record1(ex:f(), #r1{}). + +update_record1([Val|Ls], Acc=#r1{not_aliased0=N0,not_aliased1=N1}) -> +%ssa% (_, Rec) when post_ssa_opt -> +%ssa% _ = update_record(reuse, 3, Rec, 3, NA0, 2, NA1) {unique => [Rec, NA1, NA0], source_dies => true}. + R = Acc#r1{not_aliased0=N0+1,not_aliased1=[Val|N1]}, + update_record1(Ls, R); +update_record1([], Acc) -> + Acc. + +live_past_call_triggers_aliasing(A) -> +%%% As X lives past the call to id, X and Y alias each other. +%ssa% (A) when post_ssa_opt -> +%ssa% X = put_tuple(A), +%ssa% Y = call(fun id/1, X) { aliased => [X] }, +%ssa% R = put_tuple(X, Y) { aliased => [X,Y] }, +%ssa% ret(R) { unique => [R] }. + X = {A}, + Y = id(X), + {X,Y}. + +%% Check that the alias analysis handles the case where the called +%% function only has a known return status for a result type which is +%% not present in the call. In the example the looked up type is a +%% #t_union{} but a status is only known for 'nil'. +fuzz0(_V0) -> + maybe + [] ?= _V0 + else + _ when ok -> + ok; + [] -> + ok; + _ -> + _V0 + end. + +fuzz0() -> + fuzz0(ok). + +alias_after_phi() -> + alias_after_phi({e:f(),e:f()}). + +alias_after_phi(X) -> +%% Check that X is aliased after the Phi. +%ssa% (Arg0) when post_ssa_opt -> +%ssa% Phi = phi({_,_}, {Arg0,_}, ...), +%ssa% _ = get_tuple_element(Arg0, 0) {aliased => [Arg0]}. + {A,B} = X, + T = case e:f() of + 1 -> + X; + 2 -> + {e:f(),e:f()} + end, + {A,B} = T, + {A,B,X}. + +%% Check that the identifier type is considered plain and therefore +%% unique. +check_identifier_type() -> + R = {case e:f() of + X when is_port(X) -> + X; + X when is_pid(X) -> + X + end}, + should_return_unique(R). + +should_return_unique({X}) -> +%ssa% (_) when post_ssa_opt -> +%ssa% ret(R) { unique => [R] }. + X. diff --git a/lib/compiler/test/beam_ssa_check_SUITE_data/alias_chain.erl b/lib/compiler/test/beam_ssa_check_SUITE_data/alias_chain.erl index 7d46176fb9b3..b12780096f3c 100644 --- a/lib/compiler/test/beam_ssa_check_SUITE_data/alias_chain.erl +++ b/lib/compiler/test/beam_ssa_check_SUITE_data/alias_chain.erl @@ -24,7 +24,7 @@ %% analysis iteration limit if functions are visited naïvely in the %% order they occur in the module. %% --compile(no_ssa_opt_private_append). +-compile(no_ssa_opt_destructive_update). -module(alias_chain). diff --git a/lib/compiler/test/beam_ssa_check_SUITE_data/alias_non_convergence.erl b/lib/compiler/test/beam_ssa_check_SUITE_data/alias_non_convergence.erl index c127827a77da..9a77c8ef9092 100644 --- a/lib/compiler/test/beam_ssa_check_SUITE_data/alias_non_convergence.erl +++ b/lib/compiler/test/beam_ssa_check_SUITE_data/alias_non_convergence.erl @@ -21,7 +21,7 @@ %% MAX_REPETITIONS are required for the analysis to converge. %% --compile(no_ssa_opt_private_append). +-compile(no_ssa_opt_destructive_update). -module(alias_non_convergence). diff --git a/lib/compiler/test/beam_ssa_check_SUITE_data/private_append.erl b/lib/compiler/test/beam_ssa_check_SUITE_data/private_append.erl index 13be1882e055..6f4888016362 100644 --- a/lib/compiler/test/beam_ssa_check_SUITE_data/private_append.erl +++ b/lib/compiler/test/beam_ssa_check_SUITE_data/private_append.erl @@ -16,7 +16,7 @@ %% %% %CopyrightEnd% %% -%% This module tests that beam_ssa_alias_opt:to_private_append/3 +%% This module tests that the beam_ssa_destructive_update pass %% rewrites plain appends in bs_create_bin to private_append when %% appropriate. %% @@ -197,15 +197,18 @@ transformable4([H|T], Acc) -> transformable4([], Acc) -> Acc. -%% Check that the alias analysis handles local functions. +%% Check that when a variable is live across a call it is considered +%% aliased. If the alias analysis is extended to track if an argument +%% is captured by the callee, that information could be fed back to +%% the caller. Until that is done, this test is expected to fail. transformable5(L) -> -%ssa% (_) when post_ssa_opt -> +%ssa% xfail (_) when post_ssa_opt -> %ssa% A = bs_init_writable(_), %ssa% _ = call(fun transformable5/2, _, A). transformable5(L, <<>>). transformable5([H|T], Acc) -> -%ssa% (_, Arg1) when post_ssa_opt -> +%ssa% xfail (_, Arg1) when post_ssa_opt -> %ssa% A = bs_create_bin(private_append, _, Arg1, ...), %ssa% _ = call(fun transformable5/2, _, A). does_not_escape(Acc), @@ -440,18 +443,17 @@ transformable16([A,B|T], {{Acc0}, Acc1}) -> transformable16([], {{Acc0}, Acc1}) -> {Acc0,Acc1}. -%% We should use type information to figure out that {<<>>, X} is not -%% aliased, but as of now we don't have the information at this pass, -%% nor do we track alias status at the sub-term level. +%% Check that type information is used to figure out that {<<>>, X} is +%% not aliased. transformable18(L, X) when is_integer(X), X < 256 -> -%ssa% xfail (_, _) when post_ssa_opt -> +%ssa% (_, _) when post_ssa_opt -> %ssa% A = bs_init_writable(_), -%ssa% B = put_tuple(_, A), +%ssa% B = put_tuple(A, _), %ssa% _ = call(fun transformable18b/2, _, B). transformable18b(L, {<<>>, X}). transformable18b([H|T], {Acc,X}) -> -%ssa% xfail (_, Arg1) when post_ssa_opt -> +%ssa% (_, Arg1) when post_ssa_opt -> %ssa% A = get_tuple_element(Arg1, 0), %ssa% B = bs_create_bin(private_append, _, A, ...), %ssa% C = put_tuple(B, _), @@ -592,16 +594,15 @@ make_empty_binary_tuple_nested() -> %ssa% ret(D). {<<>>, {<<>>}, 47}. -%% We can't handle this as we do not track alias status at the sub-term level. transformable24(L) -> -%ssa% xfail (_) when post_ssa_opt -> +%ssa% (_) when post_ssa_opt -> %ssa% A = bs_init_writable(_), %ssa% B = put_tuple(A, _), %ssa% _ = call(fun transformable24/2, _, B). transformable24(L, {<<>>, ex:foo()}). transformable24([H|T], {Acc,X}) -> -%ssa% xfail (_, Arg1) when post_ssa_opt -> +%ssa% (_, Arg1) when post_ssa_opt -> %ssa% A = get_tuple_element(Arg1, 0), %ssa% B = bs_create_bin(private_append, _, A, ...), %ssa% C = put_tuple(B, _), @@ -1003,13 +1004,13 @@ bs_create_bin_on_literal() -> >>/binary >>. -%% Check that the beam_ssa_private_append pass doesn't crash, if it, -%% during initial value tracking, ends up in operations which do not -%% create bit strings. This can happen as the initial value tracking -%% in beam_ssa_private_append doesn't consider types. As the decision -%% to apply the private append transform is using type information, -%% tracking values into not type-compatible execution paths is -%% harmless. +%% Check that the beam_ssa_destructive_update pass doesn't crash, if +%% it, during initial value tracking, ends up in operations which do +%% not create bit strings. This can happen as the initial value +%% tracking in beam_ssa_destructive_update doesn't consider types. As +%% the decision to apply the private append transform is using type +%% information, tracking values into not type-compatible execution +%% paths is harmless. crash_in_value_tracking_inner(_, 1.0, _) -> %ssa% (_, _, _) when post_ssa_opt -> %ssa% _ = bs_init_writable(_). diff --git a/lib/compiler/test/beam_ssa_check_SUITE_data/sanity_checks.erl b/lib/compiler/test/beam_ssa_check_SUITE_data/sanity_checks.erl index a97c00359b4b..a47665597916 100644 --- a/lib/compiler/test/beam_ssa_check_SUITE_data/sanity_checks.erl +++ b/lib/compiler/test/beam_ssa_check_SUITE_data/sanity_checks.erl @@ -18,7 +18,7 @@ -module(sanity_checks). --compile(no_ssa_opt_private_append). +-compile(no_ssa_opt_destructive_update). -export([check_fail/0, check_wrong_pass/0, diff --git a/lib/compiler/test/beam_ssa_check_SUITE_data/tuple_inplace_checks.erl b/lib/compiler/test/beam_ssa_check_SUITE_data/tuple_inplace_checks.erl new file mode 100644 index 000000000000..cdac0fdad601 --- /dev/null +++ b/lib/compiler/test/beam_ssa_check_SUITE_data/tuple_inplace_checks.erl @@ -0,0 +1,209 @@ +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2023. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +%% TODO +%% +-module(tuple_inplace_checks). + +-export([do0a/0, do0b/2, different_sizes/2, ambiguous_inits/1, + update_record0/0, fc/0, track_update_record/1]). + +-record(r, {a=0,b=0,c=0,tot=0}). + +do0a() -> + Ls = ex:f(), + r0(Ls, #r{}). + +do0b(A, B) -> + Ls = ex:f(), + r0(Ls, #r{a=A+B,b=A+B}). + +r0([{Key,Val}|Updates], Acc=#r{a=A,b=B,c=C,tot=T}) -> +%ssa% (_, Rec) when post_ssa_opt -> +%ssa% _ = update_record(inplace, 5, Rec, ...), +%ssa% _ = update_record(inplace, 5, Rec, ...), +%ssa% _ = update_record(inplace, 5, Rec, ...). + R = case Key of + a -> Acc#r{a=Val + A, tot=T + Val}; + b -> Acc#r{b=Val + B, tot=T + Val}; + c -> Acc#r{c=Val + C, tot=T + Val} + end, + r0(Updates, R); +r0([], Acc) -> + Acc. + +%% Check that that the literal returned by make_ds(a) is rewritten to +%% a put_tuple but the result of make_ds(b) is left alone. +-record(ds,{a}). + +make_ds(a) -> +%ssa% (K) when post_ssa_opt -> +%ssa% switch(K, Fail, [{a,IsA},{b,IsB}]), +%ssa% label IsB, +%ssa% ret({0,0}), +%ssa% label IsA, +%ssa% Rec = put_tuple(ds, 0), +%ssa% Tuple = put_tuple(0, 0, Rec), +%ssa% ret(Tuple). + {0,0,#ds{a=0}}; +make_ds(b) -> + {0,0}. + +%% Check that #ds{} is updated using update_record+inplace +work_ds([X|Rest], {A,B,C=#ds{a=F}}) -> +%ssa% (Ls, Acc) when post_ssa_opt -> +%ssa% Size = bif:tuple_size(Acc), +%ssa% switch(Size, Fail, [{2,_},{3,RecordLbl}]), +%ssa% label RecordLbl, +%ssa% Tuple = get_tuple_element(Acc, 2), +%ssa% _ = update_record(inplace, 2, Tuple, 2, _). + work_ds(Rest, {A,B,C#ds{a=F+X}}); +work_ds([X|Rest], {A,B}) -> + work_ds(Rest, {A+X,B+X+X}); +work_ds([], Acc) -> + Acc. + +different_sizes(L, K) -> + {work_ds(L, make_ds(K)), work_ds(L, make_ds(K))}. + +%% Check that both branches of ambiguous_make/1 are converted into +%% heap tuples. +-record(ar,{f}). + +make_int() -> + case e:f() of + X when is_integer(X) -> + X + end. + +ambiguous_make(a) -> +%ssa% (X) when post_ssa_opt -> +%ssa% IsB = bif:'=:='(X, b), +%ssa% br(IsB, BLbl, ALbl), +%ssa% label BLbl, +%ssa% R0 = put_tuple(...), +%ssa% ret(R0), +%ssa% label ALbl, +%ssa% R1 = put_tuple(...), +%ssa% ret(R1). + #ar{f=make_int()}; +ambiguous_make(b) -> + #ar{f=5}. + +ambiguous([X|Rest], R=#ar{f=F}) -> +%ssa% (_, R) when post_ssa_opt -> +%ssa% _ = update_record(inplace, 2, R, ...). + ambiguous(Rest, R#ar{f=F+X}); +ambiguous([], Acc) -> + Acc. + +ambiguous_inits(L) -> + X = ambiguous(L, ambiguous_make(a)), + Y = ambiguous(L, ambiguous_make(b)), + {X,Y}. + + +-record(r0, {not_aliased=0,aliased=[]}). + +update_record0() -> + update_record0(ex:f(), #r0{}). + +update_record0([Val|Ls], Acc=#r0{not_aliased=N}) -> +%ssa% (_, Rec) when post_ssa_opt -> +%ssa% _ = update_record(inplace, 3, Rec, 3, A, 2, NA) {unique => [Rec, NA], aliased => [A]}. + R = Acc#r0{not_aliased=N+1,aliased=Val}, + update_record0(Ls, R); +update_record0([], Acc) -> + Acc. + +%% Check that the reuse hint for update_record isn't used when the +%% result is used by a inplace update_record instruction. +-record(fc_r, {anno=#{}, + is, + last}). + +fc() -> + fc0(ex:f(), []). + +fc0([{L,#fc_r{}=Blk}|Bs], Acc0) -> +%ssa% (_, _) when post_ssa_opt -> +%ssa% _ = update_record(copy, 4, _, 3, _), +%ssa% _ = update_record(copy, 4, _, 3, _). + case ex:f() of + [Is] -> + Acc = fc0(Acc0), + fc0(Bs, [{L,Blk#fc_r{is=Is}}|Acc]); + Is -> + fc0(Bs, [{L,Blk#fc_r{is=Is}}|Acc0]) + end. + +fc0([{L,Blk}|Acc]) -> +%ssa% (_) when post_ssa_opt -> +%ssa% _ = update_record(inplace, 4, _, 3, _). + [{L,Blk#fc_r{is=x}}|Acc]. + +-record(outer, {a,b}). +-record(inner, {c,d,e}). + +track_update_record(#outer{a=A}=Outer) -> +%ssa% (A0) when post_ssa_opt -> +%ssa% switch(X, _, [{0,Zero},{1,One},{2,Two},{3,Three},{4,Four}]), +%ssa% label Four, +%ssa% LitInner4 = put_tuple(inner, undefined, undefined, undefined), +%ssa% LitOuter40 = update_record(copy, 3, A0, 3, _, 2, LitInner4), +%ssa% LitOuter41 = update_record(inplace, 3, LitOuter40, 3, _), +%ssa% _ = call(fun track_update_record1/1, LitOuter41), +%ssa% label Three, +%ssa% LitInner0 = put_tuple(inner, undefined, undefined, undefined), +%ssa% LitOuter0 = update_record(copy, 3, A0, 3, _, 2, LitInner0), +%ssa% _ = call(fun track_update_record1/1, LitOuter0), +%ssa% label Two, +%ssa% LitInner1 = put_tuple(inner, c, undefined, undefined), +%ssa% LitOuter1 = put_tuple(outer, LitInner1, b), +%ssa% _ = call(fun track_update_record1/1, LitOuter1), +%ssa% label One, +%ssa% C = update_record(copy, 4, _, 2, _), +%ssa% D = update_record(copy, 3, A0, 3, _, 2, C), +%ssa% _ = call(fun track_update_record1/1, D), +%ssa% label Zero, +%ssa% A = update_record(copy, 4, _, 2, _), +%ssa% B = update_record(copy, 3, A0, 2, A), +%ssa% _ = call(fun track_update_record1/1, B). + C = e:f(), + case e:f() of + 0 -> + track_update_record1(Outer#outer{a=A#inner{c=C}}); + 1 -> + track_update_record1(Outer#outer{a=A#inner{c=C}, b=e:f()}); + 2 -> + track_update_record1(#outer{a=#inner{c=c}, b=b}); + 3 -> + track_update_record1(Outer#outer{a=#inner{},b=e:f()}); + 4 -> + Tmp0 = Outer#outer{a=#inner{},b=e:f()}, + Tmp = Tmp0#outer{b=e:f()}, + track_update_record1(Tmp) + end. + +track_update_record1(#outer{a=A}=Outer) -> +%ssa% (A) when post_ssa_opt -> +%ssa% B = update_record(inplace, 4, _, 3, _), +%ssa% R = update_record(inplace, 3, A, 2, B), +%ssa% ret(R). + B = e:f(), + Outer#outer{a=A#inner{d=B}}. diff --git a/lib/compiler/test/bs_construct_SUITE.erl b/lib/compiler/test/bs_construct_SUITE.erl index bc6ed43dad41..f24074193726 100644 --- a/lib/compiler/test/bs_construct_SUITE.erl +++ b/lib/compiler/test/bs_construct_SUITE.erl @@ -765,8 +765,8 @@ private_append(_Config) -> ok. -%% GH-7121: Alias analysis would not mark fun arguments as aliased, fooling -%% the beam_ssa_private_append pass. +%% GH-7121: Alias analysis would not mark fun arguments as aliased, +%% fooling the beam_ssa_destructive_update pass. private_append_1(M) when is_map(M) -> maps:fold(fun (K, V, Acc = <<>>) -> <>; diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl index 4420b176931c..61b939577268 100644 --- a/lib/stdlib/src/erl_parse.yrl +++ b/lib/stdlib/src/erl_parse.yrl @@ -684,6 +684,7 @@ ssa_check_list_lit_ls -> ssa_check_pat '|' ssa_check_pat : ['$1'|'$3']. ssa_check_map_key -> atom : '$1'. ssa_check_map_key -> integer : '$1'. ssa_check_map_key -> float : '$1'. +ssa_check_map_key -> var : '$1'. ssa_check_map_key -> '{' ssa_check_map_key_tuple_elements '}' : {tuple, ?anno('$1'), '$2'}. ssa_check_map_key -> '{' '}' : {tuple, ?anno('$1'), []}.