Skip to content

Commit 6d143ca

Browse files
aviateskKeno
andcommitted
wip: inlining: relax finalizer inlining control-flow restriction
Eager `finalizer` inlining (#45272) currently has a restriction that requires all the def/uses to be in a same basic block. This commit relaxes that restriction a bit by allowing def/uses to involve control flow when all of them are dominated by a `finalizer` call to be inlined, since in that case it is safe to insert the body of `finalizer` at the end of all the def/uses, e.g. ```julia const FINALIZATION_COUNT = Ref(0) init_finalization_count!() = FINALIZATION_COUNT[] = 0 get_finalization_count() = FINALIZATION_COUNT[] @noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x @noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...)) mutable struct DoAllocWithFieldInter x::Int end function register_finalizer!(obj::DoAllocWithFieldInter) finalizer(obj) do this add_finalization_count!(this.x) end end function cfg_finalization3(io) for i = -999:1000 o = DoAllocWithFieldInter(i) register_finalizer!(o) if i == 1000 safeprint(io, o.x, '\n') elseif i > 0 safeprint(io, o.x) end end end let src = code_typed1(cfg_finalization3, (IO,)) @test count(isinvoke(:add_finalization_count!), src.code) == 1 end let init_finalization_count!() cfg_finalization3(IOBuffer()) @test get_finalization_count() == 1000 end ``` To support this transformation, the domtree code also gains the ability to represent post-dominator trees, which is generally useful. Co-authored-by: Keno Fischer <keno@juliacomputing.com>
1 parent 67f994c commit 6d143ca

File tree

5 files changed

+340
-71
lines changed

5 files changed

+340
-71
lines changed

base/compiler/ssair/domtree.jl

Lines changed: 104 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,16 @@ end
109109

110110
length(D::DFSTree) = length(D.from_pre)
111111

112-
function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
112+
function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool)
113113
copy!(D, DFSTree(length(blocks)))
114-
to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
115-
pre_num = 1
114+
if is_post_dominator
115+
# TODO: We're using -1 as the virtual exit node here. Would it make
116+
# sense to actually have a real BB for the exit always?
117+
to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)]
118+
else
119+
to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
120+
end
121+
pre_num = is_post_dominator ? 0 : 1
116122
post_num = 1
117123
while !isempty(to_visit)
118124
# Because we want the postorder number as well as the preorder number,
@@ -123,28 +129,39 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
123129
if pushed_children
124130
# Going up the DFS tree, so all we need to do is record the
125131
# postorder number, then move on
126-
D.to_post[current_node_bb] = post_num
127-
D.from_post[post_num] = current_node_bb
132+
if current_node_bb != -1
133+
D.to_post[current_node_bb] = post_num
134+
D.from_post[post_num] = current_node_bb
135+
end
128136
post_num += 1
129137
pop!(to_visit)
130138

131-
elseif D.to_pre[current_node_bb] != 0
139+
elseif current_node_bb != -1 && D.to_pre[current_node_bb] != 0
132140
# Node has already been visited, move on
133141
pop!(to_visit)
134142
continue
135143
else
136144
# Going down the DFS tree
137145

138146
# Record preorder number
139-
D.to_pre[current_node_bb] = pre_num
140-
D.from_pre[pre_num] = current_node_bb
141-
D.to_parent_pre[pre_num] = parent_pre
147+
if current_node_bb != -1
148+
D.to_pre[current_node_bb] = pre_num
149+
D.from_pre[pre_num] = current_node_bb
150+
D.to_parent_pre[pre_num] = parent_pre
151+
end
142152

143153
# Record that children (will) have been pushed
144154
to_visit[end] = (current_node_bb, parent_pre, true)
145155

156+
if is_post_dominator && current_node_bb == -1
157+
edges = Int[bb for bb in 1:length(blocks) if isempty(blocks[bb].succs)]
158+
else
159+
edges = is_post_dominator ? blocks[current_node_bb].preds :
160+
blocks[current_node_bb].succs
161+
end
162+
146163
# Push children to the stack
147-
for succ_bb in blocks[current_node_bb].succs
164+
for succ_bb in edges
148165
push!(to_visit, (succ_bb, pre_num, false))
149166
end
150167

@@ -161,7 +178,7 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
161178
return D
162179
end
163180

164-
DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks)
181+
DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0), blocks, is_post_dominator)
165182

166183
"""
167184
Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
@@ -184,7 +201,7 @@ end
184201
DomTreeNode() = DomTreeNode(1, Vector{BBNumber}())
185202

186203
"Data structure that encodes which basic block dominates which."
187-
struct DomTree
204+
struct GenericDomTree{IsPostDom}
188205
# These can be reused when updating domtree dynamically
189206
dfs_tree::DFSTree
190207
snca_state::Vector{SNCAData}
@@ -195,19 +212,25 @@ struct DomTree
195212
# The nodes in the tree (ordered by BB indices)
196213
nodes::Vector{DomTreeNode}
197214
end
215+
const DomTree = GenericDomTree{false}
216+
const PostDomTree = GenericDomTree{true}
198217

199-
function DomTree()
200-
return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
218+
function (T::Type{<:GenericDomTree})()
219+
return T(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
201220
end
202221

203222
function construct_domtree(blocks::Vector{BasicBlock})
204223
return update_domtree!(blocks, DomTree(), true, 0)
205224
end
206225

207-
function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
208-
recompute_dfs::Bool, max_pre::PreNumber)
226+
function construct_postdomtree(blocks::Vector{BasicBlock})
227+
return update_domtree!(blocks, PostDomTree(), true, 0)
228+
end
229+
230+
function update_domtree!(blocks::Vector{BasicBlock}, domtree::GenericDomTree{IsPostDom},
231+
recompute_dfs::Bool, max_pre::PreNumber) where {IsPostDom}
209232
if recompute_dfs
210-
DFS!(domtree.dfs_tree, blocks)
233+
DFS!(domtree.dfs_tree, blocks, IsPostDom)
211234
end
212235

213236
if max_pre == 0
@@ -219,17 +242,24 @@ function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
219242
return domtree
220243
end
221244

222-
function compute_domtree_nodes!(domtree::DomTree)
245+
function compute_domtree_nodes!(domtree::GenericDomTree{IsPostDom}) where {IsPostDom}
223246
# Compute children
224247
copy!(domtree.nodes,
225248
DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)])
226249
for (idx, idom) in Iterators.enumerate(domtree.idoms_bb)
227-
(idx == 1 || idom == 0) && continue
250+
((!IsPostDom && idx == 1) || idom == 0) && continue
228251
push!(domtree.nodes[idom].children, idx)
229252
end
230253
# n.b. now issorted(domtree.nodes[*].children) since idx is sorted above
231254
# Recursively set level
232-
update_level!(domtree.nodes, 1, 1)
255+
if IsPostDom
256+
for (node, idom) in enumerate(domtree.idoms_bb)
257+
idom == 0 || continue
258+
update_level!(domtree.nodes, node, 1)
259+
end
260+
else
261+
update_level!(domtree.nodes, 1, 1)
262+
end
233263
return domtree.nodes
234264
end
235265

@@ -244,13 +274,18 @@ function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int)
244274
end
245275
end
246276

277+
dom_edges(domtree::DomTree, blocks::Vector{BasicBlock}, idx::BBNumber) =
278+
blocks[idx].preds
279+
dom_edges(domtree::PostDomTree, blocks::Vector{BasicBlock}, idx::BBNumber) =
280+
blocks[idx].succs
281+
247282
"""
248283
The main Semi-NCA algorithm. Matches Figure 2.8 in [LG05]. Note that the
249284
pseudocode in [LG05] is not entirely accurate. The best way to understand
250285
what's happening is to read [LT79], then the description of SLT in [LG05]
251286
(warning: inconsistent notation), then the description of Semi-NCA.
252287
"""
253-
function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
288+
function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, max_pre::PreNumber) where {IsPostDom}
254289
D = domtree.dfs_tree
255290
state = domtree.snca_state
256291
# There may be more blocks than are reachable in the DFS / dominator tree
@@ -289,13 +324,14 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
289324
# Calculate semidominators, but only for blocks with preorder number up to
290325
# max_pre
291326
ancestors = copy(D.to_parent_pre)
292-
for w::PreNumber in reverse(2:max_pre)
327+
relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre)
328+
for w::PreNumber in reverse(relevant_blocks)
293329
# LLVM initializes this to the parent, the paper initializes this to
294330
# `w`, but it doesn't really matter (the parent is a predecessor, so at
295331
# worst we'll discover it below). Save a memory reference here.
296332
semi_w = typemax(PreNumber)
297333
last_linked = PreNumber(w + 1)
298-
for v blocks[D.from_pre[w]].preds
334+
for v dom_edges(domtree, blocks, D.from_pre[w])
299335
# For the purpose of the domtree, ignore virtual predecessors into
300336
# catch blocks.
301337
v == 0 && continue
@@ -331,7 +367,7 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
331367
# ancestor in the (immediate) dominator tree between its semidominator and
332368
# its parent (see Lemma 2.6 in [LG05]).
333369
idoms_pre = copy(D.to_parent_pre)
334-
for v in 2:n_nodes
370+
for v in (IsPostDom ? (1:n_nodes) : (2:n_nodes))
335371
idom = idoms_pre[v]
336372
vsemi = state[v].semi
337373
while idom > vsemi
@@ -343,10 +379,11 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
343379
# Express idoms in BB indexing
344380
resize!(domtree.idoms_bb, n_blocks)
345381
for i::BBNumber in 1:n_blocks
346-
if i == 1 || D.to_pre[i] == 0
382+
if (!IsPostDom && i == 1) || D.to_pre[i] == 0
347383
domtree.idoms_bb[i] = 0
348384
else
349-
domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]]
385+
ip = idoms_pre[D.to_pre[i]]
386+
domtree.idoms_bb[i] = ip == 0 ? 0 : D.from_pre[ip]
350387
end
351388
end
352389
end
@@ -549,7 +586,12 @@ Checks if `bb1` dominates `bb2`.
549586
`bb1` dominates `bb2` if the only way to enter `bb2` is via `bb1`.
550587
(Other blocks may be in between, e.g `bb1->bbx->bb2`).
551588
"""
552-
function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber)
589+
dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) =
590+
_dominates(domtree, bb1, bb2)
591+
postdominates(domtree::PostDomTree, bb1::BBNumber, bb2::BBNumber) =
592+
_dominates(domtree, bb1, bb2)
593+
594+
function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber)
553595
bb1 == bb2 && return true
554596
target_level = domtree.nodes[bb1].level
555597
source_level = domtree.nodes[bb2].level
@@ -584,19 +626,48 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
584626
return (bb, nothing)
585627
end
586628

587-
function naive_idoms(blocks::Vector{BasicBlock})
629+
"""
630+
nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
631+
632+
Compute the nearest common (post-)dominator of `a` and `b`.
633+
"""
634+
function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
635+
alevel = domtree.nodes[a].level
636+
blevel = domtree.nodes[b].level
637+
# W.l.g. assume blevel <= alevel
638+
if alevel < blevel
639+
a, b = b, a
640+
alevel, blevel = blevel, alevel
641+
end
642+
while alevel > blevel
643+
a = domtree.idoms_bb[a]
644+
alevel -= 1
645+
end
646+
while a != b && a != 0
647+
a = domtree.idoms_bb[a]
648+
b = domtree.idoms_bb[b]
649+
end
650+
@assert a == b
651+
return a
652+
end
653+
654+
function naive_idoms(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false)
588655
nblocks = length(blocks)
589656
# The extra +1 helps us detect unreachable blocks below
590657
dom_all = BitSet(1:nblocks+1)
591-
dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
658+
dominators = is_post_dominator ?
659+
BitSet[isempty(blocks[n].succs) ? BitSet(n) : copy(dom_all) for n = 1:nblocks] :
660+
BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
592661
changed = true
662+
relevant_blocks = (is_post_dominator ? (1:nblocks) : (2:nblocks))
593663
while changed
594664
changed = false
595-
for n = 2:nblocks
596-
if isempty(blocks[n].preds)
665+
for n in relevant_blocks
666+
edges = is_post_dominator ? blocks[n].succs : blocks[n].preds
667+
if isempty(edges)
597668
continue
598669
end
599-
firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))::NTuple{2,Any}
670+
firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, edges))::NTuple{2,Any}
600671
new_doms = copy(dominators[firstp])
601672
for p in rest
602673
intersect!(new_doms, dominators[p])
@@ -608,7 +679,7 @@ function naive_idoms(blocks::Vector{BasicBlock})
608679
end
609680
# Compute idoms
610681
idoms = fill(0, nblocks)
611-
for i = 2:nblocks
682+
for i in relevant_blocks
612683
if dominators[i] == dom_all
613684
idoms[i] = 0
614685
continue

0 commit comments

Comments
 (0)