Skip to content

Reduce memory usage through smart allocation and data structure optimization #297

@jserv

Description

@jserv

Description

shecc's memory usage grows rapidly with input size, reaching hundreds of megabytes for moderate programs. The arena allocators never free memory, and data structures are not optimized for space efficiency. This issue tracks efforts to reduce memory footprint while maintaining performance.

Memory Optimization Strategies

1. Arena Compaction

Implement aggressive memory reclamation between compilation phases:

// Current: Never free memory
typedef struct arena {
    char *base;
    size_t size;
    size_t used;
    struct arena *next;  // Chain of arenas
} arena_t;

// Proposed: Compactable arenas with metadata
typedef struct {
    void *start;
    size_t size;
    bool in_use;
    uint32_t phase;  // Compilation phase when allocated
} allocation_t;

typedef struct {
    arena_t base;
    allocation_t *allocations;
    int alloc_count;
    uint32_t current_phase;
} compactable_arena_t;

void enter_compilation_phase(compactable_arena_t *arena, uint32_t phase) {
    arena->current_phase = phase;
    
    // Free allocations from previous phases
    for (int i = 0; i < arena->alloc_count; i++) {
        if (arena->allocations[i].phase < phase - 1) {
            arena->allocations[i].in_use = false;
        }
    }
    
    // Compact if fragmentation > threshold
    if (get_fragmentation(arena) > 0.5) {
        compact_arena(arena);
    }
}

void compact_arena(compactable_arena_t *arena) {
    // Move live allocations to start of arena
    char *write_ptr = arena->base.base;
    
    for (int i = 0; i < arena->alloc_count; i++) {
        if (arena->allocations[i].in_use) {
            size_t size = arena->allocations[i].size;
            void *old_ptr = arena->allocations[i].start;
            
            if (old_ptr != write_ptr) {
                memmove(write_ptr, old_ptr, size);
                update_pointers(old_ptr, write_ptr);
            }
            
            arena->allocations[i].start = write_ptr;
            write_ptr += size;
        }
    }
    
    arena->base.used = write_ptr - arena->base.base;
}

2. Instruction Size Reduction

Optimize IR instruction representation:

// Current: 96 bytes per instruction
typedef struct insn {
    opcode_t op;           // 4 bytes
    var_t *rd, *rs1, *rs2; // 24 bytes (3 pointers)
    basic_block_t *bb;     // 8 bytes
    func_t *func;          // 8 bytes
    int imm;               // 4 bytes
    int line_no;           // 4 bytes
    char padding[44];      // Wasted space!
} insn_t;  // Total: 96 bytes

// Proposed: 32 bytes per instruction
typedef struct {
    uint16_t op;           // 2 bytes
    uint16_t flags;        // 2 bytes (combine multiple fields)
    uint32_t rd_idx;       // 4 bytes (index instead of pointer)
    uint32_t rs1_idx;      // 4 bytes
    uint32_t rs2_idx;      // 4 bytes
    union {
        int32_t imm;       // 4 bytes
        uint32_t bb_idx;   // 4 bytes (for branches)
    };
    uint32_t next_idx;     // 4 bytes (index to next instruction)
    uint32_t line_no;      // 4 bytes
} compact_insn_t;  // Total: 32 bytes (3x smaller!)

// Variable pool for index lookup
typedef struct {
    var_t *vars;
    int count;
    int capacity;
} var_pool_t;

var_t *get_var(var_pool_t *pool, uint32_t idx) {
    if (idx == INVALID_IDX) return NULL;
    return &pool->vars[idx];
}

3. Symbol Table Compression

Use string interning and compact representation:

// Current: Duplicated strings, large structs
typedef struct symbol {
    char *name;           // 8 bytes + string memory
    type_t *type;         // 8 bytes
    int offset;           // 4 bytes
    int flags;            // 4 bytes
    struct symbol *next;  // 8 bytes
    // ... more fields
} symbol_t;  // 64+ bytes

// Proposed: Interned strings, packed structure
typedef struct {
    uint32_t name_offset;  // 4 bytes (offset in string pool)
    uint16_t type_idx;     // 2 bytes (index to type table)
    uint16_t flags;        // 2 bytes
    int32_t offset;        // 4 bytes
} compact_symbol_t;  // 12 bytes (5x smaller!)

typedef struct {
    char *buffer;          // Single buffer for all strings
    size_t size;
    size_t used;
    hashmap_t *str_to_offset;
} string_pool_t;

uint32_t intern_string(string_pool_t *pool, const char *str) {
    // Check if already interned
    uint32_t *offset = hashmap_get(pool->str_to_offset, str);
    if (offset) return *offset;
    
    // Add to pool
    uint32_t new_offset = pool->used;
    size_t len = strlen(str) + 1;
    memcpy(pool->buffer + pool->used, str, len);
    pool->used += len;
    
    hashmap_put(pool->str_to_offset, str, new_offset);
    return new_offset;
}

4. Basic Block Optimization

Reduce basic block memory overhead:

// Current: Linked lists with high overhead
typedef struct basic_block {
    insn_t *insns;         // Linked list of instructions
    int insn_count;
    struct basic_block **predecessors;
    struct basic_block **successors;
    int pred_count, succ_count;
    // ... many fields
} basic_block_t;  // 128+ bytes

// Proposed: Compact representation
typedef struct {
    uint32_t first_insn;   // Index to first instruction
    uint16_t insn_count;   // Number of instructions
    uint16_t flags;        // Various boolean flags
    uint32_t preds;        // Bitmap for small graphs
    uint32_t succs;        // or index to edge list
} compact_bb_t;  // 16 bytes (8x smaller!)

5. On-Demand Loading

Load and process functions incrementally:

// Proposed: Lazy function processing
typedef struct {
    char *name;
    size_t source_offset;  // Offset in source file
    size_t source_length;
    bool is_parsed;
    bool is_compiled;
    void *compiled_code;
} lazy_func_t;

void compile_function_on_demand(lazy_func_t *func) {
    if (func->is_compiled) return;
    
    if (!func->is_parsed) {
        // Parse just this function
        parse_function_at(func->source_offset, func->source_length);
        func->is_parsed = true;
    }
    
    // Compile
    func->compiled_code = compile_single_function(func);
    func->is_compiled = true;
    
    // Free parse tree if not needed
    if (!keep_parse_trees) {
        free_function_parse_tree(func);
    }
}

6. Memory Pooling by Type

Segregate allocations by lifetime:

typedef enum {
    POOL_PERMANENT,    // Symbol tables, types
    POOL_FUNCTION,     // Per-function data
    POOL_TEMPORARY,    // Temporary during optimization
    POOL_CODEGEN      // Code generation buffers
} pool_type_t;

typedef struct {
    arena_t pools[4];
    pool_type_t active_pool;
} memory_manager_t;

void *mm_alloc(memory_manager_t *mm, size_t size, pool_type_t type) {
    return arena_alloc(&mm->pools[type], size);
}

void mm_free_pool(memory_manager_t *mm, pool_type_t type) {
    if (type != POOL_PERMANENT) {
        arena_reset(&mm->pools[type]);
    }
}

// Usage during compilation
void compile_function(func_t *func) {
    set_active_pool(POOL_FUNCTION);
    parse_function(func);
    
    set_active_pool(POOL_TEMPORARY);
    optimize_function(func);
    mm_free_pool(mm, POOL_TEMPORARY);
    
    set_active_pool(POOL_CODEGEN);
    generate_code(func);
    
    // Keep only generated code
    mm_free_pool(mm, POOL_FUNCTION);
}

Memory Benchmarks

Test Programs

// tests/memory/

// Wide program (many symbols)
// 10,000 global variables
int g0, g1, g2, ..., g9999;

// Deep program (complex CFG)
// 1,000 nested if statements
if (a) { if (b) { if (c) { ... } } }

// Long program (many instructions)
// 100,000 simple statements
x = 1; y = 2; z = 3; ...

Measurement Tools

Memory Profiler

// Add to src/debug.c
typedef struct {
    size_t current_usage;
    size_t peak_usage;
    size_t allocations;
    size_t deallocations;
    size_t bytes_allocated;
    size_t bytes_freed;
} memory_stats_t;

void print_memory_report(memory_stats_t *stats) {
    printf("\n=== Memory Report ===\n");
    printf("Peak Usage:    %.2f MB\n", stats->peak_usage / 1048576.0);
    printf("Current Usage: %.2f MB\n", stats->current_usage / 1048576.0);
    printf("Allocations:   %zu\n", stats->allocations);
    printf("Deallocations: %zu\n", stats->deallocations);
    printf("Leaked:        %.2f MB\n", 
           (stats->bytes_allocated - stats->bytes_freed) / 1048576.0);
    
    // Per-arena breakdown
    print_arena_stats();
}

Valgrind Integration

# Add to Makefile
memcheck: out/shecc
    valgrind --leak-check=full --show-leak-kinds=all \
             --track-origins=yes --verbose \
             ./out/shecc -o /dev/null tests/large.c

massif: out/shecc
    valgrind --tool=massif --pages-as-heap=yes \
             --massif-out-file=massif.out \
             ./out/shecc -o /dev/null tests/large.c
    ms_print massif.out

Testing Strategy

  • Memory regression tests in CI
  • Valgrind in test suite
  • Large input tests (100K+ lines)
  • Memory limit tests (compile with ulimit)

Success Criteria

  • Meet memory targets for all benchmarks
  • No memory leaks (Valgrind clean)
  • Bootstrap with 50% less memory
  • Compile 100K line programs in < 256MB
  • No performance regression > 5%

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions