-
Notifications
You must be signed in to change notification settings - Fork 143
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Description
Partial Redundancy Elimination is a powerful optimization that eliminates computations that are redundant on some but not all paths through the program. PRE subsumes both common subexpression elimination and loop-invariant code motion, making it one of the most effective optimizations.
Current State
shecc's optimizer in src/ssa.c
currently implements:
- Common Subexpression Elimination (CSE) - within basic blocks only
- Basic loop optimizations in strength reduction
- No cross-block redundancy elimination except for constants (SCCP)
PRE would unify and extend these optimizations.
What PRE Optimizes
Example 1: Partial Redundancy
// Before PRE
if (flag) {
x = a + b; // Computation on one path
}
y = a + b; // Redundant on flag=true path
// After PRE
t = a + b; // Hoist computation
if (flag) {
x = t;
}
y = t; // No recomputation needed
Example 2: Loop Hoisting (LICM)
// Before PRE
for (i = 0; i < n; i++) {
x = y * z; // Loop invariant
arr[i] = x + i;
}
// After PRE
x = y * z; // Hoisted out of loop
for (i = 0; i < n; i++) {
arr[i] = x + i;
}
Example 3: Code Sinking
// Before PRE
if (flag) {
x = expensive();
} else {
x = expensive(); // Same on both paths
}
use(x);
// After PRE
if (flag) {
// empty
} else {
// empty
}
x = expensive(); // Sunk after merge
use(x);
Algorithm Implementation
1. Data Flow Analysis Framework
typedef struct {
bitset_t *anticipated; // Expressions anticipated (used later)
bitset_t *available; // Expressions available (computed earlier)
bitset_t *postponable; // Can be postponed
bitset_t *used; // Used in this block
} pre_dataflow_t;
typedef struct {
int expr_id;
opcode_t op;
var_t *operand1;
var_t *operand2;
bool is_constant;
bool is_movable; // Can be legally moved
} expression_t;
typedef struct {
expression_t **expressions;
int num_expressions;
pre_dataflow_t *block_info;
dominance_info_t *dom_info;
} pre_context_t;
2. Expression Collection Phase
void collect_expressions(func_t *func, pre_context_t *ctx) {
int expr_id = 0;
for (bb in func->bbs) {
for (insn in bb->insns) {
if (is_pre_candidate(insn)) {
expression_t *expr = create_expression(insn);
// Check if expression already exists
int existing_id = find_expression(ctx, expr);
if (existing_id >= 0) {
insn->expr_id = existing_id;
} else {
expr->expr_id = expr_id++;
insn->expr_id = expr->expr_id;
add_expression(ctx, expr);
}
}
}
}
}
bool is_pre_candidate(insn_t *insn) {
// Pure operations without side effects
switch (insn->op) {
case OP_ADD: case OP_SUB: case OP_MUL:
case OP_DIV: case OP_MOD: case OP_AND:
case OP_OR: case OP_XOR: case OP_LSHIFT:
case OP_RSHIFT:
return !has_side_effects(insn);
default:
return false;
}
}
3. Anticipation Analysis (Backward)
void compute_anticipated(pre_context_t *ctx, func_t *func) {
bool changed = true;
// Initialize: all expressions anticipated at exit
for (bb in func->bbs) {
if (is_exit_block(bb)) {
clear_all(ctx->block_info[bb->id].anticipated);
} else {
set_all(ctx->block_info[bb->id].anticipated);
}
}
// Iterate until fixed point
while (changed) {
changed = false;
for (bb in reverse_postorder(func->bbs)) {
bitset_t *old = copy_bitset(ctx->block_info[bb->id].anticipated);
bitset_t *new = create_bitset(ctx->num_expressions);
// ANTout = ∩ ANTin(successors)
if (bb->successors_count > 0) {
set_all(new);
for (succ in bb->successors) {
intersect(new, ctx->block_info[succ->id].anticipated);
}
}
// ANTin = used ∪ (ANTout - killed)
copy(ctx->block_info[bb->id].anticipated, ctx->block_info[bb->id].used);
union(ctx->block_info[bb->id].anticipated, new);
subtract(ctx->block_info[bb->id].anticipated, get_killed(bb));
if (!equal(old, ctx->block_info[bb->id].anticipated)) {
changed = true;
}
}
}
}
4. Availability Analysis (Forward)
void compute_available(pre_context_t *ctx, func_t *func) {
bool changed = true;
// Initialize: nothing available at entry
for (bb in func->bbs) {
if (is_entry_block(bb)) {
clear_all(ctx->block_info[bb->id].available);
} else {
clear_all(ctx->block_info[bb->id].available);
}
}
while (changed) {
changed = false;
for (bb in func->bbs) {
bitset_t *old = copy_bitset(ctx->block_info[bb->id].available);
// AVAILin = ∩ AVAILout(predecessors)
if (bb->predecessors_count > 0) {
set_all(ctx->block_info[bb->id].available);
for (pred in bb->predecessors) {
intersect(ctx->block_info[bb->id].available,
get_avail_out(ctx, pred));
}
}
if (!equal(old, ctx->block_info[bb->id].available)) {
changed = true;
}
}
}
}
5. Placement Optimization
void compute_optimal_placement(pre_context_t *ctx, func_t *func) {
// Compute earliest placement
for (bb in func->bbs) {
for (expr_id in anticipated_expressions(bb)) {
if (!is_available(ctx, bb, expr_id) &&
is_anticipated(ctx, bb, expr_id)) {
mark_for_insertion(ctx, bb, expr_id, EARLIEST);
}
}
}
// Compute latest placement (postponable)
compute_postponable(ctx, func);
// Find optimal between earliest and latest
for (bb in func->bbs) {
for (expr_id in marked_for_insertion(ctx, bb)) {
if (is_postponable(ctx, bb, expr_id)) {
// Try to push down to reduce register pressure
if (should_postpone(ctx, bb, expr_id)) {
move_to_successors(ctx, bb, expr_id);
}
}
}
}
}
6. Code Transformation
void insert_computations(pre_context_t *ctx, func_t *func) {
for (bb in func->bbs) {
insn_t *insert_point = get_insert_point(bb);
for (expr_id in ctx->insertions[bb->id]) {
expression_t *expr = ctx->expressions[expr_id];
// Create new temporary
var_t *temp = create_temp_var();
ctx->expr_temps[expr_id] = temp;
// Insert computation
insn_t *new_insn = create_computation(expr, temp);
insert_before(insert_point, new_insn);
}
}
}
void replace_redundant(pre_context_t *ctx, func_t *func) {
for (bb in func->bbs) {
for (insn in bb->insns) {
if (has_expr_id(insn)) {
int expr_id = insn->expr_id;
if (is_redundant(ctx, bb, expr_id)) {
// Replace with temporary
var_t *temp = ctx->expr_temps[expr_id];
replace_with_move(insn, temp);
}
}
}
}
}
Integration with shecc
Location in Compilation Pipeline
Add to src/ssa.c
in the ssa_optimize()
function:
void ssa_optimize(func_t *func) {
build_ssa(func);
// NEW: Run PRE before other optimizations
partial_redundancy_elimination(func);
// Existing optimizations benefit from PRE
sccp(func); // More constants to propagate
cse(func); // Fewer expressions to check
dce(func); // More dead code from PRE temps
}
Required Infrastructure
- Dominator tree - Already exists in shecc
- Data flow framework - Need to add generic framework
- Expression hashing - Can reuse from CSE
- Temporary generation - Exists for SSA
Test Cases
// tests/test_pre.c
// Basic partial redundancy
int test_partial(int a, int b, int flag) {
int x = 0;
if (flag) {
x = a * b + 5; // Expensive computation
}
int y = a * b + 5; // Partially redundant
return x + y;
}
// Loop invariant hoisting
int test_loop_invariant(int *arr, int n, int x, int y) {
int sum = 0;
for (int i = 0; i < n; i++) {
int inv = x * y + 10; // Should be hoisted
sum += arr[i] * inv;
}
return sum;
}
// Complex control flow
int test_complex_cfg(int a, int b, int c) {
int result;
if (a > 0) {
if (b > 0) {
result = c * c + 1;
} else {
result = c * c + 2; // c*c is common
}
} else {
result = c * c + 3; // c*c on all paths
}
return result;
}
// Should not optimize (side effects)
int test_side_effects() {
int x;
if (rand() > 0) {
x = getchar() + 1;
}
int y = getchar() + 1; // Cannot eliminate - side effect
return x + y;
}
Expected Performance Impact
- 20-40% reduction in redundant computations
- 10-20% speedup in loops with invariant code
- 5-10% reduction in register pressure
- Compilation time: 5-10% increase (acceptable)
Comparison with Current Optimizations
Feature | Current CSE | Current LICM | PRE |
---|---|---|---|
Scope | Basic block | None | Global |
Partial redundancy | No | No | Yes |
Loop invariants | No | No | Yes |
Code sinking | No | No | Yes |
Optimal placement | No | No | Yes |
References
- Morel & Renvoise (1979) - Original PRE algorithm
- Knoop, Rüthing & Steffen (1992) - Lazy code motion
- Briggs & Cooper (1994) - Practical improvements
Success Criteria
- Eliminates all full redundancies (like CSE)
- Hoists all loop invariants
- No incorrect transformations
- Bootstrap remains successful
- 20%+ reduction in redundant computations in benchmarks
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request