@@ -7,6 +7,7 @@ use super::Stats;
7
7
use crate :: dominator_tree:: DominatorTree ;
8
8
use crate :: fx:: { FxHashMap , FxHashSet } ;
9
9
use crate :: hash_map:: Entry as HashEntry ;
10
+ use crate :: inst_predicates:: is_pure_for_egraph;
10
11
use crate :: ir:: { Block , Function , Inst , Value , ValueDef } ;
11
12
use crate :: loop_analysis:: { Loop , LoopAnalysis } ;
12
13
use crate :: scoped_hash_map:: ScopedHashMap ;
@@ -216,46 +217,112 @@ impl<'a> Elaborator<'a> {
216
217
217
218
fn compute_best_values ( & mut self ) {
218
219
let best = & mut self . value_to_best_value ;
219
- for ( value, def) in self . func . dfg . values_and_defs ( ) {
220
- trace ! ( "computing best for value {:?} def {:?}" , value, def) ;
221
- match def {
222
- ValueDef :: Union ( x, y) => {
223
- // Pick the best of the two options based on
224
- // min-cost. This works because each element of `best`
225
- // is a `(cost, value)` tuple; `cost` comes first so
226
- // the natural comparison works based on cost, and
227
- // breaks ties based on value number.
228
- trace ! ( " -> best of {:?} and {:?}" , best[ x] , best[ y] ) ;
229
- best[ value] = std:: cmp:: min ( best[ x] , best[ y] ) ;
230
- trace ! ( " -> {:?}" , best[ value] ) ;
231
- }
232
- ValueDef :: Param ( _, _) => {
233
- best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
234
- }
235
- // If the Inst is inserted into the layout (which is,
236
- // at this point, only the side-effecting skeleton),
237
- // then it must be computed and thus we give it zero
238
- // cost.
239
- ValueDef :: Result ( inst, _) => {
240
- if let Some ( _) = self . func . layout . inst_block ( inst) {
241
- best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
242
- } else {
243
- trace ! ( " -> value {}: result, computing cost" , value) ;
244
- let inst_data = & self . func . dfg . insts [ inst] ;
245
- // N.B.: at this point we know that the opcode is
246
- // pure, so `pure_op_cost`'s precondition is
247
- // satisfied.
248
- let cost = Cost :: of_pure_op (
249
- inst_data. opcode ( ) ,
250
- self . func . dfg . inst_values ( inst) . map ( |value| best[ value] . 0 ) ,
220
+
221
+ // Do a fixpoint loop to compute the best value for each eclass.
222
+ //
223
+ // The maximum number of iterations is the length of the longest chain
224
+ // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this
225
+ // is *technically* quadratic, but `cranelift-frontend` won't construct
226
+ // any such edges. NaN canonicalization will introduce some of these
227
+ // edges, but they are chains of only two or three edges. So in
228
+ // practice, we *never* do more than a handful of iterations here unless
229
+ // (a) we parsed the CLIF from text and the text was funkily numbered,
230
+ // which we don't really care about, or (b) the CLIF producer did
231
+ // something weird, in which case it is their responsibility to stop
232
+ // doing that.
233
+ trace ! ( "Entering fixpoint loop to compute the best values for each eclass" ) ;
234
+ let mut keep_going = true ;
235
+ while keep_going {
236
+ keep_going = false ;
237
+ trace ! (
238
+ "fixpoint iteration {}" ,
239
+ self . stats. elaborate_best_cost_fixpoint_iters
240
+ ) ;
241
+ self . stats . elaborate_best_cost_fixpoint_iters += 1 ;
242
+
243
+ for ( value, def) in self . func . dfg . values_and_defs ( ) {
244
+ trace ! ( "computing best for value {:?} def {:?}" , value, def) ;
245
+ let orig_best_value = best[ value] ;
246
+
247
+ match def {
248
+ ValueDef :: Union ( x, y) => {
249
+ // Pick the best of the two options based on
250
+ // min-cost. This works because each element of `best`
251
+ // is a `(cost, value)` tuple; `cost` comes first so
252
+ // the natural comparison works based on cost, and
253
+ // breaks ties based on value number.
254
+ best[ value] = std:: cmp:: min ( best[ x] , best[ y] ) ;
255
+ trace ! (
256
+ " -> best of union({:?}, {:?}) = {:?}" ,
257
+ best[ x] ,
258
+ best[ y] ,
259
+ best[ value]
251
260
) ;
252
- best[ value] = BestEntry ( cost, value) ;
253
261
}
254
- }
255
- } ;
256
- debug_assert_ne ! ( best[ value] . 0 , Cost :: infinity( ) ) ;
257
- debug_assert_ne ! ( best[ value] . 1 , Value :: reserved_value( ) ) ;
258
- trace ! ( "best for eclass {:?}: {:?}" , value, best[ value] ) ;
262
+ ValueDef :: Param ( _, _) => {
263
+ best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
264
+ }
265
+ // If the Inst is inserted into the layout (which is,
266
+ // at this point, only the side-effecting skeleton),
267
+ // then it must be computed and thus we give it zero
268
+ // cost.
269
+ ValueDef :: Result ( inst, _) => {
270
+ if let Some ( _) = self . func . layout . inst_block ( inst) {
271
+ best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
272
+ } else {
273
+ let inst_data = & self . func . dfg . insts [ inst] ;
274
+ // N.B.: at this point we know that the opcode is
275
+ // pure, so `pure_op_cost`'s precondition is
276
+ // satisfied.
277
+ let cost = Cost :: of_pure_op (
278
+ inst_data. opcode ( ) ,
279
+ self . func . dfg . inst_values ( inst) . map ( |value| best[ value] . 0 ) ,
280
+ ) ;
281
+ best[ value] = BestEntry ( cost, value) ;
282
+ trace ! ( " -> cost of value {} = {:?}" , value, cost) ;
283
+ }
284
+ }
285
+ } ;
286
+
287
+ // Keep on iterating the fixpoint loop while we are finding new
288
+ // best values.
289
+ keep_going |= orig_best_value != best[ value] ;
290
+ }
291
+ }
292
+
293
+ if cfg ! ( any( feature = "trace-log" , debug_assertions) ) {
294
+ trace ! ( "finished fixpoint loop to compute best value for each eclass" ) ;
295
+ for value in self . func . dfg . values ( ) {
296
+ trace ! ( "-> best for eclass {:?}: {:?}" , value, best[ value] ) ;
297
+ debug_assert_ne ! ( best[ value] . 1 , Value :: reserved_value( ) ) ;
298
+ // You might additionally be expecting an assert that the best
299
+ // cost is not infinity, however infinite cost *can* happen in
300
+ // practice. First, note that our cost function doesn't know
301
+ // about any shared structure in the dataflow graph, it only
302
+ // sums operand costs. (And trying to avoid that by deduping a
303
+ // single operation's operands is a losing game because you can
304
+ // always just add one indirection and go from `add(x, x)` to
305
+ // `add(foo(x), bar(x))` to hide the shared structure.) Given
306
+ // that blindness to sharing, we can make cost grow
307
+ // exponentially with a linear sequence of operations:
308
+ //
309
+ // v0 = iconst.i32 1 ;; cost = 1
310
+ // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1
311
+ // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5
312
+ // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13
313
+ // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29
314
+ // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61
315
+ // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125
316
+ // ;; etc...
317
+ //
318
+ // Such a chain can cause cost to saturate to infinity. How do
319
+ // we choose which e-node is best when there are multiple that
320
+ // have saturated to infinity? It doesn't matter. As long as
321
+ // invariant (2) for optimization rules is upheld by our rule
322
+ // set (see `cranelift/codegen/src/opts/README.md`) it is safe
323
+ // to choose *any* e-node in the e-class. At worst we will
324
+ // produce suboptimal code, but never an incorrectness.
325
+ }
259
326
}
260
327
}
261
328
@@ -606,7 +673,13 @@ impl<'a> Elaborator<'a> {
606
673
}
607
674
inst
608
675
} ;
676
+
609
677
// Place the inst just before `before`.
678
+ debug_assert ! (
679
+ is_pure_for_egraph( self . func, inst) ,
680
+ "something has gone very wrong if we are elaborating effectful \
681
+ instructions, they should have remained in the skeleton"
682
+ ) ;
610
683
self . func . layout . insert_inst ( inst, before) ;
611
684
612
685
// Update the inst's arguments.
0 commit comments