Skip to content

Implement Partial Redundancy Elimination (PRE) optimization #288

@jserv

Description

@jserv

Description

Partial Redundancy Elimination is a powerful optimization that eliminates computations that are redundant on some but not all paths through the program. PRE subsumes both common subexpression elimination and loop-invariant code motion, making it one of the most effective optimizations.

Current State

shecc's optimizer in src/ssa.c currently implements:

  • Common Subexpression Elimination (CSE) - within basic blocks only
  • Basic loop optimizations in strength reduction
  • No cross-block redundancy elimination except for constants (SCCP)

PRE would unify and extend these optimizations.

What PRE Optimizes

Example 1: Partial Redundancy

// Before PRE
if (flag) {
    x = a + b;  // Computation on one path
}
y = a + b;      // Redundant on flag=true path

// After PRE
t = a + b;      // Hoist computation
if (flag) {
    x = t;
}
y = t;          // No recomputation needed

Example 2: Loop Hoisting (LICM)

// Before PRE
for (i = 0; i < n; i++) {
    x = y * z;  // Loop invariant
    arr[i] = x + i;
}

// After PRE
x = y * z;      // Hoisted out of loop
for (i = 0; i < n; i++) {
    arr[i] = x + i;
}

Example 3: Code Sinking

// Before PRE
if (flag) {
    x = expensive();
} else {
    x = expensive();  // Same on both paths
}
use(x);

// After PRE
if (flag) {
    // empty
} else {
    // empty
}
x = expensive();  // Sunk after merge
use(x);

Algorithm Implementation

1. Data Flow Analysis Framework

typedef struct {
    bitset_t *anticipated;  // Expressions anticipated (used later)
    bitset_t *available;    // Expressions available (computed earlier)
    bitset_t *postponable;  // Can be postponed
    bitset_t *used;        // Used in this block
} pre_dataflow_t;

typedef struct {
    int expr_id;
    opcode_t op;
    var_t *operand1;
    var_t *operand2;
    bool is_constant;
    bool is_movable;    // Can be legally moved
} expression_t;

typedef struct {
    expression_t **expressions;
    int num_expressions;
    pre_dataflow_t *block_info;
    dominance_info_t *dom_info;
} pre_context_t;

2. Expression Collection Phase

void collect_expressions(func_t *func, pre_context_t *ctx) {
    int expr_id = 0;
    
    for (bb in func->bbs) {
        for (insn in bb->insns) {
            if (is_pre_candidate(insn)) {
                expression_t *expr = create_expression(insn);
                
                // Check if expression already exists
                int existing_id = find_expression(ctx, expr);
                if (existing_id >= 0) {
                    insn->expr_id = existing_id;
                } else {
                    expr->expr_id = expr_id++;
                    insn->expr_id = expr->expr_id;
                    add_expression(ctx, expr);
                }
            }
        }
    }
}

bool is_pre_candidate(insn_t *insn) {
    // Pure operations without side effects
    switch (insn->op) {
    case OP_ADD: case OP_SUB: case OP_MUL:
    case OP_DIV: case OP_MOD: case OP_AND:
    case OP_OR: case OP_XOR: case OP_LSHIFT:
    case OP_RSHIFT:
        return !has_side_effects(insn);
    default:
        return false;
    }
}

3. Anticipation Analysis (Backward)

void compute_anticipated(pre_context_t *ctx, func_t *func) {
    bool changed = true;
    
    // Initialize: all expressions anticipated at exit
    for (bb in func->bbs) {
        if (is_exit_block(bb)) {
            clear_all(ctx->block_info[bb->id].anticipated);
        } else {
            set_all(ctx->block_info[bb->id].anticipated);
        }
    }
    
    // Iterate until fixed point
    while (changed) {
        changed = false;
        
        for (bb in reverse_postorder(func->bbs)) {
            bitset_t *old = copy_bitset(ctx->block_info[bb->id].anticipated);
            bitset_t *new = create_bitset(ctx->num_expressions);
            
            // ANTout = ∩ ANTin(successors)
            if (bb->successors_count > 0) {
                set_all(new);
                for (succ in bb->successors) {
                    intersect(new, ctx->block_info[succ->id].anticipated);
                }
            }
            
            // ANTin = used ∪ (ANTout - killed)
            copy(ctx->block_info[bb->id].anticipated, ctx->block_info[bb->id].used);
            union(ctx->block_info[bb->id].anticipated, new);
            subtract(ctx->block_info[bb->id].anticipated, get_killed(bb));
            
            if (!equal(old, ctx->block_info[bb->id].anticipated)) {
                changed = true;
            }
        }
    }
}

4. Availability Analysis (Forward)

void compute_available(pre_context_t *ctx, func_t *func) {
    bool changed = true;
    
    // Initialize: nothing available at entry
    for (bb in func->bbs) {
        if (is_entry_block(bb)) {
            clear_all(ctx->block_info[bb->id].available);
        } else {
            clear_all(ctx->block_info[bb->id].available);
        }
    }
    
    while (changed) {
        changed = false;
        
        for (bb in func->bbs) {
            bitset_t *old = copy_bitset(ctx->block_info[bb->id].available);
            
            // AVAILin = ∩ AVAILout(predecessors)
            if (bb->predecessors_count > 0) {
                set_all(ctx->block_info[bb->id].available);
                for (pred in bb->predecessors) {
                    intersect(ctx->block_info[bb->id].available,
                            get_avail_out(ctx, pred));
                }
            }
            
            if (!equal(old, ctx->block_info[bb->id].available)) {
                changed = true;
            }
        }
    }
}

5. Placement Optimization

void compute_optimal_placement(pre_context_t *ctx, func_t *func) {
    // Compute earliest placement
    for (bb in func->bbs) {
        for (expr_id in anticipated_expressions(bb)) {
            if (!is_available(ctx, bb, expr_id) &&
                is_anticipated(ctx, bb, expr_id)) {
                mark_for_insertion(ctx, bb, expr_id, EARLIEST);
            }
        }
    }
    
    // Compute latest placement (postponable)
    compute_postponable(ctx, func);
    
    // Find optimal between earliest and latest
    for (bb in func->bbs) {
        for (expr_id in marked_for_insertion(ctx, bb)) {
            if (is_postponable(ctx, bb, expr_id)) {
                // Try to push down to reduce register pressure
                if (should_postpone(ctx, bb, expr_id)) {
                    move_to_successors(ctx, bb, expr_id);
                }
            }
        }
    }
}

6. Code Transformation

void insert_computations(pre_context_t *ctx, func_t *func) {
    for (bb in func->bbs) {
        insn_t *insert_point = get_insert_point(bb);
        
        for (expr_id in ctx->insertions[bb->id]) {
            expression_t *expr = ctx->expressions[expr_id];
            
            // Create new temporary
            var_t *temp = create_temp_var();
            ctx->expr_temps[expr_id] = temp;
            
            // Insert computation
            insn_t *new_insn = create_computation(expr, temp);
            insert_before(insert_point, new_insn);
        }
    }
}

void replace_redundant(pre_context_t *ctx, func_t *func) {
    for (bb in func->bbs) {
        for (insn in bb->insns) {
            if (has_expr_id(insn)) {
                int expr_id = insn->expr_id;
                
                if (is_redundant(ctx, bb, expr_id)) {
                    // Replace with temporary
                    var_t *temp = ctx->expr_temps[expr_id];
                    replace_with_move(insn, temp);
                }
            }
        }
    }
}

Integration with shecc

Location in Compilation Pipeline

Add to src/ssa.c in the ssa_optimize() function:

void ssa_optimize(func_t *func) {
    build_ssa(func);
    
    // NEW: Run PRE before other optimizations
    partial_redundancy_elimination(func);
    
    // Existing optimizations benefit from PRE
    sccp(func);  // More constants to propagate
    cse(func);   // Fewer expressions to check
    dce(func);   // More dead code from PRE temps
}

Required Infrastructure

  1. Dominator tree - Already exists in shecc
  2. Data flow framework - Need to add generic framework
  3. Expression hashing - Can reuse from CSE
  4. Temporary generation - Exists for SSA

Test Cases

// tests/test_pre.c

// Basic partial redundancy
int test_partial(int a, int b, int flag) {
    int x = 0;
    if (flag) {
        x = a * b + 5;  // Expensive computation
    }
    int y = a * b + 5;  // Partially redundant
    return x + y;
}

// Loop invariant hoisting
int test_loop_invariant(int *arr, int n, int x, int y) {
    int sum = 0;
    for (int i = 0; i < n; i++) {
        int inv = x * y + 10;  // Should be hoisted
        sum += arr[i] * inv;
    }
    return sum;
}

// Complex control flow
int test_complex_cfg(int a, int b, int c) {
    int result;
    if (a > 0) {
        if (b > 0) {
            result = c * c + 1;
        } else {
            result = c * c + 2;  // c*c is common
        }
    } else {
        result = c * c + 3;      // c*c on all paths
    }
    return result;
}

// Should not optimize (side effects)
int test_side_effects() {
    int x;
    if (rand() > 0) {
        x = getchar() + 1;
    }
    int y = getchar() + 1;  // Cannot eliminate - side effect
    return x + y;
}

Expected Performance Impact

  • 20-40% reduction in redundant computations
  • 10-20% speedup in loops with invariant code
  • 5-10% reduction in register pressure
  • Compilation time: 5-10% increase (acceptable)

Comparison with Current Optimizations

Feature Current CSE Current LICM PRE
Scope Basic block None Global
Partial redundancy No No Yes
Loop invariants No No Yes
Code sinking No No Yes
Optimal placement No No Yes

References

  • Morel & Renvoise (1979) - Original PRE algorithm
  • Knoop, Rüthing & Steffen (1992) - Lazy code motion
  • Briggs & Cooper (1994) - Practical improvements

Success Criteria

  • Eliminates all full redundancies (like CSE)
  • Hoists all loop invariants
  • No incorrect transformations
  • Bootstrap remains successful
  • 20%+ reduction in redundant computations in benchmarks

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions