-
Notifications
You must be signed in to change notification settings - Fork 143
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Description
shecc's memory usage grows rapidly with input size, reaching hundreds of megabytes for moderate programs. The arena allocators never free memory, and data structures are not optimized for space efficiency. This issue tracks efforts to reduce memory footprint while maintaining performance.
Memory Optimization Strategies
1. Arena Compaction
Implement aggressive memory reclamation between compilation phases:
// Current: Never free memory
typedef struct arena {
char *base;
size_t size;
size_t used;
struct arena *next; // Chain of arenas
} arena_t;
// Proposed: Compactable arenas with metadata
typedef struct {
void *start;
size_t size;
bool in_use;
uint32_t phase; // Compilation phase when allocated
} allocation_t;
typedef struct {
arena_t base;
allocation_t *allocations;
int alloc_count;
uint32_t current_phase;
} compactable_arena_t;
void enter_compilation_phase(compactable_arena_t *arena, uint32_t phase) {
arena->current_phase = phase;
// Free allocations from previous phases
for (int i = 0; i < arena->alloc_count; i++) {
if (arena->allocations[i].phase < phase - 1) {
arena->allocations[i].in_use = false;
}
}
// Compact if fragmentation > threshold
if (get_fragmentation(arena) > 0.5) {
compact_arena(arena);
}
}
void compact_arena(compactable_arena_t *arena) {
// Move live allocations to start of arena
char *write_ptr = arena->base.base;
for (int i = 0; i < arena->alloc_count; i++) {
if (arena->allocations[i].in_use) {
size_t size = arena->allocations[i].size;
void *old_ptr = arena->allocations[i].start;
if (old_ptr != write_ptr) {
memmove(write_ptr, old_ptr, size);
update_pointers(old_ptr, write_ptr);
}
arena->allocations[i].start = write_ptr;
write_ptr += size;
}
}
arena->base.used = write_ptr - arena->base.base;
}
2. Instruction Size Reduction
Optimize IR instruction representation:
// Current: 96 bytes per instruction
typedef struct insn {
opcode_t op; // 4 bytes
var_t *rd, *rs1, *rs2; // 24 bytes (3 pointers)
basic_block_t *bb; // 8 bytes
func_t *func; // 8 bytes
int imm; // 4 bytes
int line_no; // 4 bytes
char padding[44]; // Wasted space!
} insn_t; // Total: 96 bytes
// Proposed: 32 bytes per instruction
typedef struct {
uint16_t op; // 2 bytes
uint16_t flags; // 2 bytes (combine multiple fields)
uint32_t rd_idx; // 4 bytes (index instead of pointer)
uint32_t rs1_idx; // 4 bytes
uint32_t rs2_idx; // 4 bytes
union {
int32_t imm; // 4 bytes
uint32_t bb_idx; // 4 bytes (for branches)
};
uint32_t next_idx; // 4 bytes (index to next instruction)
uint32_t line_no; // 4 bytes
} compact_insn_t; // Total: 32 bytes (3x smaller!)
// Variable pool for index lookup
typedef struct {
var_t *vars;
int count;
int capacity;
} var_pool_t;
var_t *get_var(var_pool_t *pool, uint32_t idx) {
if (idx == INVALID_IDX) return NULL;
return &pool->vars[idx];
}
3. Symbol Table Compression
Use string interning and compact representation:
// Current: Duplicated strings, large structs
typedef struct symbol {
char *name; // 8 bytes + string memory
type_t *type; // 8 bytes
int offset; // 4 bytes
int flags; // 4 bytes
struct symbol *next; // 8 bytes
// ... more fields
} symbol_t; // 64+ bytes
// Proposed: Interned strings, packed structure
typedef struct {
uint32_t name_offset; // 4 bytes (offset in string pool)
uint16_t type_idx; // 2 bytes (index to type table)
uint16_t flags; // 2 bytes
int32_t offset; // 4 bytes
} compact_symbol_t; // 12 bytes (5x smaller!)
typedef struct {
char *buffer; // Single buffer for all strings
size_t size;
size_t used;
hashmap_t *str_to_offset;
} string_pool_t;
uint32_t intern_string(string_pool_t *pool, const char *str) {
// Check if already interned
uint32_t *offset = hashmap_get(pool->str_to_offset, str);
if (offset) return *offset;
// Add to pool
uint32_t new_offset = pool->used;
size_t len = strlen(str) + 1;
memcpy(pool->buffer + pool->used, str, len);
pool->used += len;
hashmap_put(pool->str_to_offset, str, new_offset);
return new_offset;
}
4. Basic Block Optimization
Reduce basic block memory overhead:
// Current: Linked lists with high overhead
typedef struct basic_block {
insn_t *insns; // Linked list of instructions
int insn_count;
struct basic_block **predecessors;
struct basic_block **successors;
int pred_count, succ_count;
// ... many fields
} basic_block_t; // 128+ bytes
// Proposed: Compact representation
typedef struct {
uint32_t first_insn; // Index to first instruction
uint16_t insn_count; // Number of instructions
uint16_t flags; // Various boolean flags
uint32_t preds; // Bitmap for small graphs
uint32_t succs; // or index to edge list
} compact_bb_t; // 16 bytes (8x smaller!)
5. On-Demand Loading
Load and process functions incrementally:
// Proposed: Lazy function processing
typedef struct {
char *name;
size_t source_offset; // Offset in source file
size_t source_length;
bool is_parsed;
bool is_compiled;
void *compiled_code;
} lazy_func_t;
void compile_function_on_demand(lazy_func_t *func) {
if (func->is_compiled) return;
if (!func->is_parsed) {
// Parse just this function
parse_function_at(func->source_offset, func->source_length);
func->is_parsed = true;
}
// Compile
func->compiled_code = compile_single_function(func);
func->is_compiled = true;
// Free parse tree if not needed
if (!keep_parse_trees) {
free_function_parse_tree(func);
}
}
6. Memory Pooling by Type
Segregate allocations by lifetime:
typedef enum {
POOL_PERMANENT, // Symbol tables, types
POOL_FUNCTION, // Per-function data
POOL_TEMPORARY, // Temporary during optimization
POOL_CODEGEN // Code generation buffers
} pool_type_t;
typedef struct {
arena_t pools[4];
pool_type_t active_pool;
} memory_manager_t;
void *mm_alloc(memory_manager_t *mm, size_t size, pool_type_t type) {
return arena_alloc(&mm->pools[type], size);
}
void mm_free_pool(memory_manager_t *mm, pool_type_t type) {
if (type != POOL_PERMANENT) {
arena_reset(&mm->pools[type]);
}
}
// Usage during compilation
void compile_function(func_t *func) {
set_active_pool(POOL_FUNCTION);
parse_function(func);
set_active_pool(POOL_TEMPORARY);
optimize_function(func);
mm_free_pool(mm, POOL_TEMPORARY);
set_active_pool(POOL_CODEGEN);
generate_code(func);
// Keep only generated code
mm_free_pool(mm, POOL_FUNCTION);
}
Memory Benchmarks
Test Programs
// tests/memory/
// Wide program (many symbols)
// 10,000 global variables
int g0, g1, g2, ..., g9999;
// Deep program (complex CFG)
// 1,000 nested if statements
if (a) { if (b) { if (c) { ... } } }
// Long program (many instructions)
// 100,000 simple statements
x = 1; y = 2; z = 3; ...
Measurement Tools
Memory Profiler
// Add to src/debug.c
typedef struct {
size_t current_usage;
size_t peak_usage;
size_t allocations;
size_t deallocations;
size_t bytes_allocated;
size_t bytes_freed;
} memory_stats_t;
void print_memory_report(memory_stats_t *stats) {
printf("\n=== Memory Report ===\n");
printf("Peak Usage: %.2f MB\n", stats->peak_usage / 1048576.0);
printf("Current Usage: %.2f MB\n", stats->current_usage / 1048576.0);
printf("Allocations: %zu\n", stats->allocations);
printf("Deallocations: %zu\n", stats->deallocations);
printf("Leaked: %.2f MB\n",
(stats->bytes_allocated - stats->bytes_freed) / 1048576.0);
// Per-arena breakdown
print_arena_stats();
}
Valgrind Integration
# Add to Makefile
memcheck: out/shecc
valgrind --leak-check=full --show-leak-kinds=all \
--track-origins=yes --verbose \
./out/shecc -o /dev/null tests/large.c
massif: out/shecc
valgrind --tool=massif --pages-as-heap=yes \
--massif-out-file=massif.out \
./out/shecc -o /dev/null tests/large.c
ms_print massif.out
Testing Strategy
- Memory regression tests in CI
- Valgrind in test suite
- Large input tests (100K+ lines)
- Memory limit tests (compile with ulimit)
Success Criteria
- Meet memory targets for all benchmarks
- No memory leaks (Valgrind clean)
- Bootstrap with 50% less memory
- Compile 100K line programs in < 256MB
- No performance regression > 5%
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request