Skip to content

Commit 310de1c

Browse files
authored
optimizer: enhance SROA, handle partially-initialized allocations (#42834)
During adding more test cases for our SROA pass, I found our SROA doesn't handle allocation sites with uninitialized fields at all. This commit is based on #42833 and tries to handle such "unsafe" allocations, if there are safe `setfield!` definitions. For example, this commit allows the allocation `r = Ref{Int}()` to be eliminated in the following example (adapted from <https://hackmd.io/bZz8k6SHQQuNUW-Vs7rqfw?view>): ```julia julia> code_typed() do r = Ref{Int}() r[] = 42 b = sin(r[]) return b end |> only ``` This commit comes with a plenty of basic test cases for our SROA pass also.
1 parent 71d57d9 commit 310de1c

File tree

3 files changed

+220
-38
lines changed

3 files changed

+220
-38
lines changed

base/compiler/optimize.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ function run_passes(ci::CodeInfo, sv::OptimizationState)
325325
@timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
326326
# @timeit "verify 2" verify_ir(ir)
327327
@timeit "compact 2" ir = compact!(ir)
328-
@timeit "SROA" ir = getfield_elim_pass!(ir)
328+
@timeit "SROA" ir = sroa_pass!(ir)
329329
@timeit "ADCE" ir = adce_pass!(ir)
330330
@timeit "type lift" ir = type_lift_pass!(ir)
331331
@timeit "compact 3" ir = compact!(ir)

base/compiler/ssair/passes.jl

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,22 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector
7575
end
7676

7777
function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int)
78-
# Find the first dominating def
78+
def, stmtblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use_idx)
79+
if def == 0
80+
if !haskey(phinodes, curblock)
81+
# If this happens, we need to search the predecessors for defs. Which
82+
# one doesn't matter - if it did, we'd have had a phinode
83+
return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds))
84+
end
85+
# The use is the phinode
86+
return phinodes[curblock]
87+
else
88+
return val_for_def_expr(ir, def, fidx)
89+
end
90+
end
91+
92+
# find the first dominating def for the given use
93+
function find_def_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use_idx::Int)
7994
stmtblock = block_for_inst(ir.cfg, use_idx)
8095
curblock = find_curblock(domtree, allblocks, stmtblock)
8196
local def = 0
@@ -90,17 +105,7 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I
90105
end
91106
end
92107
end
93-
if def == 0
94-
if !haskey(phinodes, curblock)
95-
# If this happens, we need to search the predecessors for defs. Which
96-
# one doesn't matter - if it did, we'd have had a phinode
97-
return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds))
98-
end
99-
# The use is the phinode
100-
return phinodes[curblock]
101-
else
102-
return val_for_def_expr(ir, def, fidx)
103-
end
108+
return def, stmtblock, curblock
104109
end
105110

106111
function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
@@ -538,7 +543,7 @@ function perform_lifting!(compact::IncrementalCompact,
538543
end
539544

540545
"""
541-
getfield_elim_pass!(ir::IRCode) -> newir::IRCode
546+
sroa_pass!(ir::IRCode) -> newir::IRCode
542547
543548
`getfield` elimination pass, a.k.a. Scalar Replacements of Aggregates optimization.
544549
@@ -555,7 +560,7 @@ its argument).
555560
In a case when all usages are fully eliminated, `struct` allocation may also be erased as
556561
a result of dead code elimination.
557562
"""
558-
function getfield_elim_pass!(ir::IRCode)
563+
function sroa_pass!(ir::IRCode)
559564
compact = IncrementalCompact(ir)
560565
defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}()
561566
lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
@@ -784,7 +789,6 @@ function getfield_elim_pass!(ir::IRCode)
784789
typ = typ::DataType
785790
# Partition defuses by field
786791
fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
787-
ok = true
788792
for use in defuse.uses
789793
stmt = ir[SSAValue(use)]
790794
# We may have discovered above that this use is dead
@@ -793,47 +797,59 @@ function getfield_elim_pass!(ir::IRCode)
793797
# the use in that case.
794798
stmt === nothing && continue
795799
field = try_compute_fieldidx_stmt(compact, stmt::Expr, typ)
796-
field === nothing && (ok = false; break)
800+
field === nothing && @goto skip
797801
push!(fielddefuse[field].uses, use)
798802
end
799-
ok || continue
800803
for use in defuse.defs
801804
field = try_compute_fieldidx_stmt(compact, ir[SSAValue(use)]::Expr, typ)
802-
field === nothing && (ok = false; break)
805+
field === nothing && @goto skip
803806
push!(fielddefuse[field].defs, use)
804807
end
805-
ok || continue
806808
# Check that the defexpr has defined values for all the fields
807809
# we're accessing. In the future, we may want to relax this,
808810
# but we should come up with semantics for well defined semantics
809811
# for uninitialized fields first.
810-
for (fidx, du) in pairs(fielddefuse)
812+
ndefuse = length(fielddefuse)
813+
blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse)
814+
for fidx in 1:ndefuse
815+
du = fielddefuse[fidx]
811816
isempty(du.uses) && continue
817+
push!(du.defs, idx)
818+
ldu = compute_live_ins(ir.cfg, du)
819+
phiblocks = Int[]
820+
if !isempty(ldu.live_in_bbs)
821+
phiblocks = idf(ir.cfg, ldu, domtree)
822+
end
823+
allblocks = sort(vcat(phiblocks, ldu.def_bbs))
824+
blocks[fidx] = phiblocks, allblocks
812825
if fidx + 1 > length(defexpr.args)
813-
ok = false
814-
break
826+
# even if the allocation contains an uninitialized field, we try an extra effort
827+
# to check if all uses have any "solid" `setfield!` calls that define the field
828+
# although we give up the cases below:
829+
# `def == idx`: this field can only defined at the allocation site (thus this case will throw)
830+
# `def == 0`: this field comes from `PhiNode`
831+
# we may be able to traverse control flows of PhiNode values, but it sounds
832+
# more complicated than beneficial under the current implementation
833+
for use in du.uses
834+
def = find_def_for_use(ir, domtree, allblocks, du, use)[1]
835+
(def == 0 || def == idx) && @goto skip
836+
end
815837
end
816838
end
817-
ok || continue
818839
preserve_uses = IdDict{Int, Vector{Any}}((idx=>Any[] for idx in IdSet{Int}(defuse.ccall_preserve_uses)))
819840
# Everything accounted for. Go field by field and perform idf
820-
for (fidx, du) in pairs(fielddefuse)
841+
for fidx in 1:ndefuse
842+
du = fielddefuse[fidx]
821843
ftyp = fieldtype(typ, fidx)
822844
if !isempty(du.uses)
823-
push!(du.defs, idx)
824-
ldu = compute_live_ins(ir.cfg, du)
825-
phiblocks = Int[]
826-
if !isempty(ldu.live_in_bbs)
827-
phiblocks = idf(ir.cfg, ldu, domtree)
828-
end
845+
phiblocks, allblocks = blocks[fidx]
829846
phinodes = IdDict{Int, SSAValue}()
830847
for b in phiblocks
831848
n = PhiNode()
832849
phinodes[b] = insert_node!(ir, first(ir.cfg.blocks[b].stmts),
833850
NewInstruction(n, ftyp))
834851
end
835852
# Now go through all uses and rewrite them
836-
allblocks = sort(vcat(phiblocks, ldu.def_bbs))
837853
for stmt in du.uses
838854
ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt)
839855
end
@@ -855,7 +871,6 @@ function getfield_elim_pass!(ir::IRCode)
855871
stmt == idx && continue
856872
ir[SSAValue(stmt)] = nothing
857873
end
858-
continue
859874
end
860875
isempty(defuse.ccall_preserve_uses) && continue
861876
push!(intermediaries, idx)
@@ -870,6 +885,8 @@ function getfield_elim_pass!(ir::IRCode)
870885
old_preserves..., new_preserves...)
871886
ir[SSAValue(use)] = new_expr
872887
end
888+
889+
@label skip
873890
end
874891

875892
return ir
@@ -919,14 +936,14 @@ In addition to a simple DCE for unused values and allocations,
919936
this pass also nullifies `typeassert` calls that can be proved to be no-op,
920937
in order to allow LLVM to emit simpler code down the road.
921938
922-
Note that this pass is more effective after SROA optimization (i.e. `getfield_elim_pass!`),
939+
Note that this pass is more effective after SROA optimization (i.e. `sroa_pass!`),
923940
since SROA often allows this pass to:
924941
- eliminate allocation of object whose field references are all replaced with scalar values, and
925942
- nullify `typeassert` call whose first operand has been replaced with a scalar value
926943
(, which may have introduced new type information that inference did not understand)
927944
928-
Also note that currently this pass _needs_ to run after `getfield_elim_pass!`, because
929-
the `typeassert` elimination depends on the transformation within `getfield_elim_pass!`
945+
Also note that currently this pass _needs_ to run after `sroa_pass!`, because
946+
the `typeassert` elimination depends on the transformation within `sroa_pass!`
930947
which redirects references of `typeassert`ed value to the corresponding `PiNode`.
931948
"""
932949
function adce_pass!(ir::IRCode)

test/compiler/irpasses.jl

Lines changed: 167 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,171 @@ end
6969

7070
# Tests for SROA
7171

72+
import Core.Compiler: argextype, singleton_type
73+
const EMPTY_SPTYPES = Core.Compiler.EMPTY_SLOTTYPES
74+
75+
code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::Core.CodeInfo
76+
get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
77+
78+
# check if `x` is a statement with a given `head`
79+
isnew(@nospecialize x) = Meta.isexpr(x, :new)
80+
81+
# check if `x` is a dynamic call of a given function
82+
iscall(y) = @nospecialize(x) -> iscall(y, x)
83+
function iscall((src, f)::Tuple{Core.CodeInfo,Function}, @nospecialize(x))
84+
return iscall(x) do @nospecialize x
85+
singleton_type(argextype(x, src, EMPTY_SPTYPES)) === f
86+
end
87+
end
88+
iscall(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
89+
90+
struct ImmutableXYZ; x; y; z; end
91+
mutable struct MutableXYZ; x; y; z; end
92+
93+
# should optimize away very basic cases
94+
let src = code_typed1((Any,Any,Any)) do x, y, z
95+
xyz = ImmutableXYZ(x, y, z)
96+
xyz.x, xyz.y, xyz.z
97+
end
98+
@test !any(isnew, src.code)
99+
end
100+
let src = code_typed1((Any,Any,Any)) do x, y, z
101+
xyz = MutableXYZ(x, y, z)
102+
xyz.x, xyz.y, xyz.z
103+
end
104+
@test !any(isnew, src.code)
105+
end
106+
107+
# should handle simple mutabilities
108+
let src = code_typed1((Any,Any,Any)) do x, y, z
109+
xyz = MutableXYZ(x, y, z)
110+
xyz.y = 42
111+
xyz.x, xyz.y, xyz.z
112+
end
113+
@test !any(isnew, src.code)
114+
@test any(src.code) do @nospecialize x
115+
iscall((src, tuple), x) &&
116+
x.args[2:end] == Any[#=x=# Core.Argument(2), 42, #=x=# Core.Argument(4)]
117+
end
118+
end
119+
let src = code_typed1((Any,Any,Any)) do x, y, z
120+
xyz = MutableXYZ(x, y, z)
121+
xyz.x, xyz.z = xyz.z, xyz.x
122+
xyz.x, xyz.y, xyz.z
123+
end
124+
@test !any(isnew, src.code)
125+
@test any(src.code) do @nospecialize x
126+
iscall((src, tuple), x) &&
127+
x.args[2:end] == Any[#=z=# Core.Argument(4), #=y=# Core.Argument(3), #=x=# Core.Argument(2)]
128+
end
129+
end
130+
# circumvent uninitialized fields as far as there is a solid `setfield!` definition
131+
let src = code_typed1() do
132+
r = Ref{Any}()
133+
r[] = 42
134+
return r[]
135+
end
136+
@test !any(isnew, src.code)
137+
end
138+
let src = code_typed1((Bool,)) do cond
139+
r = Ref{Any}()
140+
if cond
141+
r[] = 42
142+
return r[]
143+
else
144+
r[] = 32
145+
return r[]
146+
end
147+
end
148+
@test !any(isnew, src.code)
149+
end
150+
# FIXME to handle this case, we need a more strong alias analysis
151+
let src = code_typed1((Bool,)) do cond
152+
r = Ref{Any}()
153+
if cond
154+
r[] = 42
155+
else
156+
r[] = 32
157+
end
158+
return r[]
159+
end
160+
@test_broken !any(isnew, src.code)
161+
end
162+
let src = code_typed1((Bool,)) do cond
163+
r = Ref{Any}()
164+
if cond
165+
r[] = 42
166+
end
167+
return r[]
168+
end
169+
# N.B. `r` should be allocated since `cond` might be `false` and then it will be thrown
170+
@test any(isnew, src.code)
171+
end
172+
173+
# should include a simple alias analysis
174+
struct ImmutableOuter{T}; x::T; y::T; z::T; end
175+
mutable struct MutableOuter{T}; x::T; y::T; z::T; end
176+
let src = code_typed1((Any,Any,Any)) do x, y, z
177+
xyz = ImmutableXYZ(x, y, z)
178+
outer = ImmutableOuter(xyz, xyz, xyz)
179+
outer.x.x, outer.y.y, outer.z.z
180+
end
181+
@test !any(src.code) do @nospecialize x
182+
Meta.isexpr(x, :new)
183+
end
184+
@test any(src.code) do @nospecialize x
185+
iscall((src, tuple), x) &&
186+
x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
187+
end
188+
end
189+
let src = code_typed1((Any,Any,Any)) do x, y, z
190+
xyz = ImmutableXYZ(x, y, z)
191+
# #42831 forms ::PartialStruct(ImmutableOuter{Any}, Any[ImmutableXYZ, ImmutableXYZ, ImmutableXYZ])
192+
# so the succeeding `getproperty`s are type stable and inlined
193+
outer = ImmutableOuter{Any}(xyz, xyz, xyz)
194+
outer.x.x, outer.y.y, outer.z.z
195+
end
196+
@test !any(isnew, src.code)
197+
@test any(src.code) do @nospecialize x
198+
iscall((src, tuple), x) &&
199+
x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
200+
end
201+
end
202+
# FIXME our analysis isn't yet so powerful at this moment, e.g. it can't handle nested mutable objects
203+
let src = code_typed1((Any,Any,Any)) do x, y, z
204+
xyz = MutableXYZ(x, y, z)
205+
outer = ImmutableOuter(xyz, xyz, xyz)
206+
outer.x.x, outer.y.y, outer.z.z
207+
end
208+
@test_broken !any(isnew, src.code)
209+
end
210+
let src = code_typed1((Any,Any,Any)) do x, y, z
211+
xyz = ImmutableXYZ(x, y, z)
212+
outer = MutableOuter(xyz, xyz, xyz)
213+
outer.x.x, outer.y.y, outer.z.z
214+
end
215+
@test_broken !any(isnew, src.code)
216+
end
217+
218+
# should work nicely with inlining to optimize away a complicated case
219+
# adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B
220+
struct Point
221+
x::Float64
222+
y::Float64
223+
end
224+
#=@inline=# add(a::Point, b::Point) = Point(a.x + b.x, a.y + b.y)
225+
function compute()
226+
a = Point(1.5, 2.5)
227+
b = Point(2.25, 4.75)
228+
for i in 0:(100000000-1)
229+
a = add(add(a, b), b)
230+
end
231+
a.x, a.y
232+
end
233+
let src = code_typed1(compute)
234+
@test !any(isnew, src.code)
235+
end
236+
72237
mutable struct Foo30594; x::Float64; end
73238
Base.copy(x::Foo30594) = Foo30594(x.x)
74239
function add!(p::Foo30594, off::Foo30594)
@@ -180,7 +345,7 @@ let m = Meta.@lower 1 + 1
180345
src.ssaflags = fill(Int32(0), nstmts)
181346
ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any])
182347
@test Core.Compiler.verify_ir(ir) === nothing
183-
ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir)
348+
ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
184349
@test Core.Compiler.verify_ir(ir) === nothing
185350
end
186351

@@ -384,7 +549,7 @@ exc39508 = ErrorException("expected")
384549
end
385550
@test test39508() === exc39508
386551

387-
let # `getfield_elim_pass!` should work with constant globals
552+
let # `sroa_pass!` should work with constant globals
388553
# immutable pass
389554
src = @eval Module() begin
390555
const REF_FLD = :x

0 commit comments

Comments
 (0)