Skip to content

Commit d76f2a4

Browse files
committed
optimizer: inline abstract union-split callsite
Currently the optimizer handles abstract callsite only when there is a single dispatch candidate (in most cases), and so inlining and static-dispatch are prohibited when the callsite is union-split (in other word, union-split happens only when all the dispatch candidates are concrete). However, there are certain patterns of code (most notably our Julia-level compiler code) that inherently need to deal with abstract callsite. The following example is taken from `Core.Compiler` utility: ```julia julia> @inline isType(@nospecialize t) = isa(t, DataType) && t.name === Type.body.name isType (generic function with 1 method) julia> code_typed((Any,)) do x # abstract, but no union-split, successful inlining isType(x) end |> only CodeInfo( 1 ─ %1 = (x isa Main.DataType)::Bool └── goto #3 if not %1 2 ─ %3 = π (x, DataType) │ %4 = Base.getfield(%3, :name)::Core.TypeName │ %5 = Base.getfield(Type{T}, :name)::Core.TypeName │ %6 = (%4 === %5)::Bool └── goto #4 3 ─ goto #4 4 ┄ %9 = φ (#2 => %6, #3 => false)::Bool └── return %9 ) => Bool julia> code_typed((Union{Type,Nothing},)) do x # abstract, union-split, unsuccessful inlining isType(x) end |> only CodeInfo( 1 ─ %1 = (isa)(x, Nothing)::Bool └── goto #3 if not %1 2 ─ goto #4 3 ─ %4 = Main.isType(x)::Bool └── goto #4 4 ┄ %6 = φ (#2 => false, #3 => %4)::Bool └── return %6 ) => Bool ``` (note that this is a limitation of the inlining algorithm, and so any user-provided hints like callsite inlining annotation doesn't help here) This commit enables inlining and static dispatch for abstract union-split callsite. The core idea here is that we can simulate our dispatch semantics by generating `isa` checks in order of the specialities of dispatch candidates: ```julia julia> code_typed((Union{Type,Nothing},)) do x # union-split, unsuccessful inlining isType(x) end |> only CodeInfo( 1 ─ %1 = (isa)(x, Nothing)::Bool └── goto #3 if not %1 2 ─ goto #9 3 ─ %4 = (isa)(x, Type)::Bool └── goto #8 if not %4 4 ─ %6 = π (x, Type) │ %7 = (%6 isa Main.DataType)::Bool └── goto #6 if not %7 5 ─ %9 = π (%6, DataType) │ %10 = Base.getfield(%9, :name)::Core.TypeName │ %11 = Base.getfield(Type{T}, :name)::Core.TypeName │ %12 = (%10 === %11)::Bool └── goto #7 6 ─ goto #7 7 ┄ %15 = φ (#5 => %12, #6 => false)::Bool └── goto #9 8 ─ Core.throw(ErrorException("fatal error in type inference (type bound)"))::Union{} └── unreachable 9 ┄ %19 = φ (#2 => false, #7 => %15)::Bool └── return %19 ) => Bool ``` Inlining/static-dispatch of abstract union-split callsite will improve the performance in such situations (and so this commit will improve the latency of our JIT compilation). Especially, this commit helps us avoid excessive specializations of `Core.Compiler` code by statically-resolving `@nospecialize`d callsites, and as the result, the # of precompiled statements is now reduced from `1956` ([`master`](dc45d77)) to `1901` (this commit). And also, as a side effect, the implementation of our inlining algorithm gets much simplified now since we no longer need the previous special handlings for abstract callsites. One possible drawback would be increased code size. This change seems to certainly increase the size of sysimage, but I think these numbers are in an acceptable range: > [`master`](dc45d77) ``` ❯ du -sh usr/lib/julia/* 17M usr/lib/julia/corecompiler.ji 188M usr/lib/julia/sys-o.a 164M usr/lib/julia/sys.dylib 23M usr/lib/julia/sys.dylib.dSYM 101M usr/lib/julia/sys.ji ``` > this commit ``` ❯ du -sh usr/lib/julia/* 17M usr/lib/julia/corecompiler.ji 190M usr/lib/julia/sys-o.a 166M usr/lib/julia/sys.dylib 23M usr/lib/julia/sys.dylib.dSYM 102M usr/lib/julia/sys.ji ```
1 parent cb2fa5d commit d76f2a4

File tree

3 files changed

+120
-90
lines changed

3 files changed

+120
-90
lines changed

base/compiler/ssair/inlining.jl

Lines changed: 45 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
241241
push!(from_bbs, length(state.new_cfg_blocks))
242242
# TODO: Right now we unconditionally generate a fallback block
243243
# in case of subtyping errors - This is probably unnecessary.
244-
if i != length(cases) || (!fully_covered || (!params.trust_inference && isdispatchtuple(cases[i].sig)))
244+
if i != length(cases) || (!fully_covered || (!params.trust_inference))
245245
# This block will have the next condition or the final else case
246246
push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
247247
push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
@@ -313,7 +313,6 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
313313
spec = item.spec::ResolvedInliningSpec
314314
sparam_vals = item.mi.sparam_vals
315315
def = item.mi.def::Method
316-
inline_cfg = spec.ir.cfg
317316
linetable_offset::Int32 = length(linetable)
318317
# Append the linetable of the inlined function to our line table
319318
inlined_at = Int(compact.result[idx][:line])
@@ -471,17 +470,17 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
471470
join_bb = bbs[end]
472471
pn = PhiNode()
473472
local bb = compact.active_result_bb
474-
@assert length(bbs) >= length(cases)
475-
for i in 1:length(cases)
473+
ncases = length(cases)
474+
@assert length(bbs) >= ncases
475+
for i = 1:ncases
476476
ithcase = cases[i]
477477
mtype = ithcase.sig::DataType # checked within `handle_cases!`
478478
case = ithcase.item
479479
next_cond_bb = bbs[i]
480480
cond = true
481481
nparams = fieldcount(atype)
482482
@assert nparams == fieldcount(mtype)
483-
if i != length(cases) || !fully_covered ||
484-
(!params.trust_inference && isdispatchtuple(cases[i].sig))
483+
if i != ncases || !fully_covered || !params.trust_inference
485484
for i = 1:nparams
486485
a, m = fieldtype(atype, i), fieldtype(mtype, i)
487486
# If this is always true, we don't need to check for it
@@ -538,7 +537,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
538537
bb += 1
539538
# We're now in the fall through block, decide what to do
540539
if fully_covered
541-
if !params.trust_inference && isdispatchtuple(cases[end].sig)
540+
if !params.trust_inference
542541
e = Expr(:call, GlobalRef(Core, :throw), FATAL_TYPE_BOUND_ERROR)
543542
insert_node_here!(compact, NewInstruction(e, Union{}, line))
544543
insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
@@ -561,7 +560,7 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
561560
state = CFGInliningState(ir)
562561
for (idx, item) in todo
563562
if isa(item, UnionSplit)
564-
cfg_inline_unionsplit!(ir, idx, item::UnionSplit, state, params)
563+
cfg_inline_unionsplit!(ir, idx, item, state, params)
565564
else
566565
item = item::InliningTodo
567566
spec = item.spec::ResolvedInliningSpec
@@ -1175,12 +1174,8 @@ function analyze_single_call!(
11751174
sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
11761175
argtypes = sig.argtypes
11771176
cases = InliningCase[]
1178-
local only_method = nothing # keep track of whether there is one matching method
1179-
local meth::MethodLookupResult
1177+
local any_fully_covered = false
11801178
local handled_all_cases = true
1181-
local any_covers_full = false
1182-
local revisit_idx = nothing
1183-
11841179
for i in 1:length(infos)
11851180
meth = infos[i].results
11861181
if meth.ambig
@@ -1191,66 +1186,20 @@ function analyze_single_call!(
11911186
# No applicable methods; try next union split
11921187
handled_all_cases = false
11931188
continue
1194-
else
1195-
if length(meth) == 1 && only_method !== false
1196-
if only_method === nothing
1197-
only_method = meth[1].method
1198-
elseif only_method !== meth[1].method
1199-
only_method = false
1200-
end
1201-
else
1202-
only_method = false
1203-
end
12041189
end
1205-
for (j, match) in enumerate(meth)
1206-
any_covers_full |= match.fully_covers
1207-
if !isdispatchtuple(match.spec_types)
1208-
if !match.fully_covers
1209-
handled_all_cases = false
1210-
continue
1211-
end
1212-
if revisit_idx === nothing
1213-
revisit_idx = (i, j)
1214-
else
1215-
handled_all_cases = false
1216-
revisit_idx = nothing
1217-
end
1218-
else
1219-
handled_all_cases &= handle_match!(match, argtypes, flag, state, cases)
1220-
end
1190+
for match in meth
1191+
handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
1192+
any_fully_covered |= match.fully_covers
12211193
end
12221194
end
12231195

1224-
atype = argtypes_to_type(argtypes)
1225-
if handled_all_cases && revisit_idx !== nothing
1226-
# If there's only one case that's not a dispatchtuple, we can
1227-
# still unionsplit by visiting all the other cases first.
1228-
# This is useful for code like:
1229-
# foo(x::Int) = 1
1230-
# foo(@nospecialize(x::Any)) = 2
1231-
# where we where only a small number of specific dispatchable
1232-
# cases are split off from an ::Any typed fallback.
1233-
(i, j) = revisit_idx
1234-
match = infos[i].results[j]
1235-
handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
1236-
elseif length(cases) == 0 && only_method isa Method
1237-
# if the signature is fully covered and there is only one applicable method,
1238-
# we can try to inline it even if the signature is not a dispatch tuple.
1239-
# -- But don't try it if we already tried to handle the match in the revisit_idx
1240-
# case, because that'll (necessarily) be the same method.
1241-
if length(infos) > 1
1242-
(metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any),
1243-
atype, only_method.sig)::SimpleVector
1244-
match = MethodMatch(metharg, methsp::SimpleVector, only_method, true)
1245-
else
1246-
@assert length(meth) == 1
1247-
match = meth[1]
1248-
end
1249-
handle_match!(match, argtypes, flag, state, cases, true) || return nothing
1250-
any_covers_full = handled_all_cases = match.fully_covers
1196+
if !handled_all_cases
1197+
# if we've not seen all candidates, union split is valid only for dispatch tuples
1198+
filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
12511199
end
12521200

1253-
handle_cases!(ir, idx, stmt, atype, cases, any_covers_full && handled_all_cases, todo, state.params)
1201+
handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases,
1202+
handled_all_cases & any_fully_covered, todo, state.params)
12541203
end
12551204

12561205
# similar to `analyze_single_call!`, but with constant results
@@ -1261,8 +1210,8 @@ function handle_const_call!(
12611210
(; call, results) = cinfo
12621211
infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
12631212
cases = InliningCase[]
1213+
local any_fully_covered = false
12641214
local handled_all_cases = true
1265-
local any_covers_full = false
12661215
local j = 0
12671216
for i in 1:length(infos)
12681217
meth = infos[i].results
@@ -1278,32 +1227,26 @@ function handle_const_call!(
12781227
for match in meth
12791228
j += 1
12801229
result = results[j]
1281-
any_covers_full |= match.fully_covers
1230+
any_fully_covered |= match.fully_covers
12821231
if isa(result, ConstResult)
12831232
case = const_result_item(result, state)
12841233
push!(cases, InliningCase(result.mi.specTypes, case))
12851234
elseif isa(result, InferenceResult)
1286-
handled_all_cases &= handle_inf_result!(result, argtypes, flag, state, cases)
1235+
handled_all_cases &= handle_inf_result!(result, argtypes, flag, state, cases, true)
12871236
else
12881237
@assert result === nothing
1289-
handled_all_cases &= handle_match!(match, argtypes, flag, state, cases)
1238+
handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
12901239
end
12911240
end
12921241
end
12931242

1294-
# if the signature is fully covered and there is only one applicable method,
1295-
# we can try to inline it even if the signature is not a dispatch tuple
1296-
atype = argtypes_to_type(argtypes)
1297-
if length(cases) == 0
1298-
length(results) == 1 || return nothing
1299-
result = results[1]
1300-
isa(result, InferenceResult) || return nothing
1301-
handle_inf_result!(result, argtypes, flag, state, cases, true) || return nothing
1302-
spec_types = cases[1].sig
1303-
any_covers_full = handled_all_cases = atype <: spec_types
1243+
if !handled_all_cases
1244+
# if we've not seen all candidates, union split is valid only for dispatch tuples
1245+
filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
13041246
end
13051247

1306-
handle_cases!(ir, idx, stmt, atype, cases, any_covers_full && handled_all_cases, todo, state.params)
1248+
handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases,
1249+
handled_all_cases & any_fully_covered, todo, state.params)
13071250
end
13081251

13091252
function handle_match!(
@@ -1313,7 +1256,6 @@ function handle_match!(
13131256
allow_abstract || isdispatchtuple(spec_types) || return false
13141257
item = analyze_method!(match, argtypes, flag, state)
13151258
item === nothing && return false
1316-
_any(case->case.sig === spec_types, cases) && return true
13171259
push!(cases, InliningCase(spec_types, item))
13181260
return true
13191261
end
@@ -1349,7 +1291,24 @@ function handle_cases!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype),
13491291
handle_single_case!(ir, idx, stmt, cases[1].item, todo, params)
13501292
elseif length(cases) > 0
13511293
isa(atype, DataType) || return nothing
1352-
all(case::InliningCase->isa(case.sig, DataType), cases) || return nothing
1294+
# `ir_inline_unionsplit!` is going to generate `isa` checks corresponding to the
1295+
# signatures of union-split dispatch candidates in order to simulate the dispatch
1296+
# semantics, and inline their bodies into each `isa`-conditional block -- and since
1297+
# we may deal with abstract union-split callsites here, these dispatch candidates
1298+
# need to be sorted in order of their signature specificity.
1299+
# Fortunately, ml_matches already sorted them in that way, so we can just process
1300+
# them in order, as far as we haven't changed their order somewhere up to this point.
1301+
ncases = length(cases)
1302+
for i = 1:ncases
1303+
sigᵢ = cases[i].sig
1304+
isa(sigᵢ, DataType) || return nothing
1305+
for j = i+1:ncases
1306+
sigⱼ = cases[j].sig
1307+
# since we already bail out from ambiguous case, we can use `morespecific` as
1308+
# a strict total order of specificity (in a case when they don't have a type intersection)
1309+
!hasintersect(sigᵢ, sigⱼ) || morespecific(sigᵢ, sigⱼ) || return nothing
1310+
end
1311+
end
13531312
push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
13541313
end
13551314
return nothing
@@ -1445,7 +1404,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
14451404

14461405
analyze_single_call!(ir, idx, stmt, infos, flag, sig, state, todo)
14471406
end
1448-
todo
1407+
1408+
return todo
14491409
end
14501410

14511411
function linear_inline_eligible(ir::IRCode)

base/sort.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ module Sort
55
import ..@__MODULE__, ..parentmodule
66
const Base = parentmodule(@__MODULE__)
77
using .Base.Order
8-
using .Base: copymutable, LinearIndices, length, (:),
8+
using .Base: copymutable, LinearIndices, length, (:), iterate,
99
eachindex, axes, first, last, similar, zip, OrdinalRange,
1010
AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline,
1111
AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !,

test/compiler/inline.jl

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,76 @@ let
810810
@test invoke(Any[10]) === false
811811
end
812812

813+
# test union-split, non-dispatchtuple callsite inlining
814+
815+
@constprop :none @noinline abstract_unionsplit(@nospecialize x::Any) = Base.inferencebarrier(:Any)
816+
@constprop :none @noinline abstract_unionsplit(@nospecialize x::Number) = Base.inferencebarrier(:Number)
817+
let src = code_typed1((Any,)) do x
818+
abstract_unionsplit(x)
819+
end
820+
@test count(isinvoke(:abstract_unionsplit), src.code) == 2
821+
@test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
822+
end
823+
let src = code_typed1((Union{Type,Number},)) do x
824+
abstract_unionsplit(x)
825+
end
826+
@test count(isinvoke(:abstract_unionsplit), src.code) == 2
827+
@test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
828+
end
829+
830+
@constprop :none @noinline abstract_unionsplit_fallback(@nospecialize x::Type) = Base.inferencebarrier(:Any)
831+
@constprop :none @noinline abstract_unionsplit_fallback(@nospecialize x::Number) = Base.inferencebarrier(:Number)
832+
let src = code_typed1((Any,)) do x
833+
abstract_unionsplit_fallback(x)
834+
end
835+
@test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
836+
@test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
837+
end
838+
let src = code_typed1((Union{Type,Number},)) do x
839+
abstract_unionsplit_fallback(x)
840+
end
841+
@test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
842+
@test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
843+
end
844+
845+
@constprop :aggressive @inline abstract_unionsplit(c, @nospecialize x::Any) = (c && println("erase me"); typeof(x))
846+
@constprop :aggressive @inline abstract_unionsplit(c, @nospecialize x::Number) = (c && println("erase me"); typeof(x))
847+
let src = code_typed1((Any,)) do x
848+
abstract_unionsplit(false, x)
849+
end
850+
@test count(iscall((src, typeof)), src.code) == 2
851+
@test count(isinvoke(:println), src.code) == 0
852+
@test count(iscall((src, println)), src.code) == 0
853+
@test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
854+
end
855+
let src = code_typed1((Union{Type,Number},)) do x
856+
abstract_unionsplit(false, x)
857+
end
858+
@test count(iscall((src, typeof)), src.code) == 2
859+
@test count(isinvoke(:println), src.code) == 0
860+
@test count(iscall((src, println)), src.code) == 0
861+
@test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
862+
end
863+
864+
@constprop :aggressive @inline abstract_unionsplit_fallback(c, @nospecialize x::Type) = (c && println("erase me"); typeof(x))
865+
@constprop :aggressive @inline abstract_unionsplit_fallback(c, @nospecialize x::Number) = (c && println("erase me"); typeof(x))
866+
let src = code_typed1((Any,)) do x
867+
abstract_unionsplit_fallback(false, x)
868+
end
869+
@test count(iscall((src, typeof)), src.code) == 2
870+
@test count(isinvoke(:println), src.code) == 0
871+
@test count(iscall((src, println)), src.code) == 0
872+
@test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
873+
end
874+
let src = code_typed1((Union{Type,Number},)) do x
875+
abstract_unionsplit_fallback(false, x)
876+
end
877+
@test count(iscall((src, typeof)), src.code) == 2
878+
@test count(isinvoke(:println), src.code) == 0
879+
@test count(iscall((src, println)), src.code) == 0
880+
@test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
881+
end
882+
813883
# issue 43104
814884

815885
@inline isGoodType(@nospecialize x::Type) =
@@ -1090,11 +1160,11 @@ end
10901160

10911161
global x44200::Int = 0
10921162
function f44200()
1093-
global x = 0
1094-
while x < 10
1095-
x += 1
1163+
global x44200 = 0
1164+
while x44200 < 10
1165+
x44200 += 1
10961166
end
1097-
x
1167+
x44200
10981168
end
10991169
let src = code_typed1(f44200)
11001170
@test count(x -> isa(x, Core.PiNode), src.code) == 0

0 commit comments

Comments
 (0)