Open
Description
Zig Version
0.11.0-dev.753+331861161
Steps to Reproduce and Observed Behavior
I thought this was covered by #12215, but now that it is fixed I still get terrible performance.
Here is a simple benchmark:
const std = @import("std");
const len = 32*32*32;
fn getIndex(i: u16) u16 {
return i;
}
pub const Chunk = struct {
blocks: [len]u16 = undefined,
};
pub noinline fn regenerateMainMesh(chunk: *Chunk) u32 {
var sum: u32 = 0;
var i: u16 = 0;
while(i < len) : (i += 1) {
sum += chunk.blocks[getIndex(i)]; // ← workaround: (&chunk.blocks)[...]
}
return sum;
}
pub fn main() void {
var chunk: Chunk = Chunk{};
for(chunk.blocks) |*block, i| {
block.* = @intCast(u16, i);
}
const start = std.time.nanoTimestamp();
const sum = regenerateMainMesh(&chunk);
const end = std.time.nanoTimestamp();
std.log.err("Time: {} Sum: {}", .{end - start, sum});
}
Even in release-fast the performance is terrible:
$ zig run test.zig -OReleaseFast
error: Time: 104980842 Sum: 536854528
godbolt reveals that, like in #12215, there is a memcpy
when accessing the array.
Expected Behavior
When applying the workaround
- sum += chunk.blocks[getIndex(i)]; // ← workaround: (&chunk.blocks)[...]
+ sum += (&chunk.blocks)[getIndex(i)]; // ← workaround: (&chunk.blocks)[...]
the performance is significantly better:
$ zig run test.zig -OReleaseFast
error: Time: 4188 Sum: 536854528