Skip to content

Optimize branch patterns in peephole optimizer #290

@jserv

Description

@jserv

Description

Branch instructions often follow patterns that can be optimized for better performance and code size. This includes combining compare-and-branch, eliminating redundant branches, and optimizing branch chains.

Branch Patterns to Optimize

1. Compare and Branch Fusion

# ARM - Before
cmp r0, #0
beq label

# ARM - After  
cbz r0, label    ; Combined compare-branch-zero

# RISC-V - Before
li t0, 0
beq a0, t0, label

# RISC-V - After
beqz a0, label   ; Pseudo-instruction for branch-equal-zero

2. Branch Chain Optimization

# Before
b .L1
.L1:
b .L2

# After
b .L2            ; Direct branch to final target

3. Conditional Branch Inversion

# Before (inefficient layout)
cmp r0, #0
bne .L1
b .L2
.L1:

# After (fall-through optimization)
cmp r0, #0
beq .L2
.L1:

4. Dead Code After Unconditional Branch

# Before
b end_func
add r0, r0, #1   ; Unreachable
mov r1, #0       ; Unreachable
end_func:

# After
b end_func
end_func:

5. Redundant Conditional Branches

# Before
cmp r0, r1
beq .same
bne .different   ; Redundant - always taken if beq not taken

# After
cmp r0, r1
beq .same
; Fall through to .different
.different:

6. Compare Elimination

# Before
sub r2, r0, r1   ; Sets flags
cmp r0, r1       ; Redundant comparison

# After
sub r2, r0, r1   ; Flags already set
; Use flags from sub

Implementation

1. Branch Target Analysis

typedef struct {
    char *label;
    int offset;
    bool is_forward;
    int jump_count;  // How many jumps to reach final target
} branch_target_t;

branch_target_t *analyze_branch_target(insn_t *branch) {
    branch_target_t *target = malloc(sizeof(branch_target_t));
    target->label = branch->target_label;
    
    // Follow branch chains
    while (is_unconditional_branch(get_insn_at_label(target->label))) {
        target->label = get_insn_at_label(target->label)->target_label;
        target->jump_count++;
    }
    
    return target;
}

2. Pattern Matching for Fusion

typedef struct {
    insn_t *cmp;
    insn_t *branch;
    bool can_fuse;
} cmp_branch_pair_t;

bool can_fuse_cmp_branch(insn_t *cmp, insn_t *branch) {
    // Check if consecutive
    if (cmp->next != branch) return false;
    
    // Check if comparing with zero
    if (cmp->op == OP_CMP && is_zero_operand(cmp->rs2)) {
        // Check branch type
        switch (branch->cond) {
        case COND_EQ:  // Can use cbz
        case COND_NE:  // Can use cbnz
            return true;
        }
    }
    
    return false;
}

void apply_cmp_branch_fusion(cmp_branch_pair_t *pair) {
    // Replace with fused instruction
    if (pair->branch->cond == COND_EQ) {
        create_cbz(pair->cmp->rs1, pair->branch->target);
    } else {
        create_cbnz(pair->cmp->rs1, pair->branch->target);
    }
    
    // Mark original instructions for deletion
    mark_deleted(pair->cmp);
    mark_deleted(pair->branch);
}

3. Branch Chain Elimination

void optimize_branch_chains(func_t *func) {
    for (bb in func->bbs) {
        insn_t *last = get_last_insn(bb);
        
        if (is_branch(last)) {
            branch_target_t *target = analyze_branch_target(last);
            
            if (target->jump_count > 0) {
                // Update to direct branch
                last->target_label = target->label;
                last->offset = calculate_offset(last, target->label);
            }
        }
    }
}

4. Layout Optimization

typedef struct {
    basic_block_t *bb;
    int exec_frequency;
    bool is_loop_header;
} bb_layout_t;

void optimize_branch_layout(func_t *func) {
    // Calculate execution frequencies
    bb_layout_t *layout = analyze_frequencies(func);
    
    // Place hot blocks together
    for (int i = 0; i < func->bb_count; i++) {
        if (layout[i].exec_frequency > HOT_THRESHOLD) {
            // Try to make hot path fall-through
            if (can_invert_branch(layout[i].bb)) {
                invert_branch_condition(layout[i].bb);
                swap_successors(layout[i].bb);
            }
        }
    }
}

5. Conditional Move Generation

bool try_conditional_move(insn_t *branch) {
    // Pattern: branch around single move
    if (!is_conditional_branch(branch)) return false;
    
    basic_block_t *taken = branch->taken_target;
    basic_block_t *not_taken = branch->not_taken_target;
    
    // Check for simple move pattern
    if (count_insns(taken) == 1 && is_move(taken->first_insn)) {
        if (arch_has_conditional_move()) {
            // Replace with conditional move
            create_conditional_move(branch->cond, 
                                  taken->first_insn->rd,
                                  taken->first_insn->rs1);
            remove_branch(branch);
            return true;
        }
    }
    
    return false;
}

Test Cases

// Compare and branch fusion
void test_cbz(int x) {
    if (x == 0) {     // Should use cbz
        do_something();
    }
}

// Branch chain
void test_chain() {
    goto label1;
label1:
    goto label2;      // Should optimize to direct jump
label2:
    return;
}

// Layout optimization
int test_hot_path(int x) {
    if (x > 0) {      // Assume hot path
        return x * 2;
    } else {          // Cold path
        return complex_calculation(x);
    }
}

// Dead code elimination
int test_dead_after_branch() {
    if (condition) {
        return 1;
        x = 5;        // Dead code
    }
    return 0;
}

Architecture-Specific Optimizations

ARM

  • Utilize cbz/cbnz instructions
  • Use conditional execution (IT blocks in Thumb-2)
  • Optimize for pipeline characteristics

RISC-V

  • Use compressed branch instructions when possible
  • Optimize JAL vs JALR usage
  • Consider branch prediction hints

Performance Impact

  • 5-10% reduction in branch instructions
  • Improved branch prediction accuracy
  • Better I-cache utilization
  • Reduced pipeline stalls

Success Metrics

  • Reduction in total branch count
  • Improved fall-through rate
  • Fewer mispredicted branches
  • Smaller code size

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions