Skip to content

Commit 42348a5

Browse files
feat: AVX512 diff algorithm (#131)
* AVX diff * Add avx CI run * Change avx512_diff build flag to --enable-asm runtime flag * Do not log unsupported windows OS for avx build --------- Co-authored-by: Dmitriy Kovalenko <dmtr.kovalenko@outlook.com>
1 parent b019c7f commit 42348a5

File tree

7 files changed

+452
-10
lines changed

7 files changed

+452
-10
lines changed

.github/workflows/build.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ jobs:
8686
path: binaries/odiff-macos-arm64
8787
retention-days: 14
8888

89+
90+
8991
test:
9092
name: Run Tests (all targets)
9193
runs-on: ${{ matrix.os }}
@@ -139,6 +141,43 @@ jobs:
139141
run: |
140142
echo "Skipping: no hosted runner available for ${{ matrix.target }}"
141143
144+
test-avx:
145+
name: Run Tests (AVX x86_64)
146+
runs-on: ${{ matrix.os }}
147+
strategy:
148+
matrix:
149+
include:
150+
- target: x86_64-linux-gnu
151+
os: ubuntu-latest
152+
- target: x86_64-macos
153+
os: macos-latest
154+
defaults:
155+
run:
156+
shell: bash
157+
steps:
158+
- name: Checkout code
159+
uses: actions/checkout@v4
160+
161+
- name: Setup Zig
162+
uses: mlugg/setup-zig@v2
163+
with:
164+
version: 0.15.1
165+
166+
- name: Install nasm for x86 targets (Linux)
167+
if: contains(matrix.target, 'x86_64') && runner.os == 'Linux'
168+
run: |
169+
sudo apt-get update
170+
sudo apt-get install -y nasm
171+
172+
- name: Install nasm for x86 targets (macOS)
173+
if: contains(matrix.target, 'x86_64') && runner.os == 'macOS'
174+
run: |
175+
brew install nasm
176+
177+
- name: Run AVX tests
178+
run: |
179+
zig build test-all --summary all
180+
142181
e2e-tests:
143182
name: End-to-end JavaScript tests
144183
needs: [build]

build.zig

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@ pub fn build(b: *std.Build) !void {
1010
const is_cross_compiling = target.result.cpu.arch != native_target.result.cpu.arch or
1111
target.result.os.tag != native_target.result.os.tag;
1212

13-
const lib_mod, const exe = buildOdiff(b, target, optimize, dynamic);
13+
const build_options = b.addOptions();
14+
build_options.addOption([]const u8, "version", manifest.version);
15+
const build_options_mod = build_options.createModule();
16+
17+
const lib_mod, const exe = buildOdiff(b, target, optimize, dynamic, build_options_mod);
1418
b.installArtifact(exe);
1519

1620
const run_cmd = b.addRunArtifact(exe);
@@ -36,6 +40,7 @@ pub fn build(b: *std.Build) !void {
3640
"src/test_io_bmp.zig",
3741
"src/test_io_jpg.zig",
3842
"src/test_io_tiff.zig",
43+
"src/test_avx.zig",
3944
};
4045

4146
const integration_tests_pure_zig = [_][]const u8{
@@ -58,6 +63,7 @@ pub fn build(b: *std.Build) !void {
5863
.optimize = optimize,
5964
}),
6065
});
66+
integration_test.root_module.addImport("build_options", build_options_mod);
6167
integration_test.linkLibC();
6268
integration_test.linkLibrary(root_lib);
6369
linkDeps(b, target, optimize, false, integration_test.root_module);
@@ -74,6 +80,7 @@ pub fn build(b: *std.Build) !void {
7480
.optimize = optimize,
7581
}),
7682
});
83+
pure_test.root_module.addImport("build_options", build_options_mod);
7784

7885
const run_pure_test = b.addRunArtifact(pure_test);
7986
integration_test_steps.append(run_pure_test) catch @panic("OOM");
@@ -95,7 +102,7 @@ pub fn build(b: *std.Build) !void {
95102
const build_ci_step = b.step("ci", "Build the app for CI");
96103
for (build_targets) |target_query| {
97104
const t = b.resolveTargetQuery(target_query);
98-
_, const odiff_exe = buildOdiff(b, t, optimize, dynamic);
105+
_, const odiff_exe = buildOdiff(b, t, optimize, dynamic, build_options_mod);
99106
odiff_exe.root_module.strip = true;
100107
const odiff_output = b.addInstallArtifact(odiff_exe, .{
101108
.dest_dir = .{
@@ -113,6 +120,7 @@ fn buildOdiff(
113120
target: std.Build.ResolvedTarget,
114121
optimize: std.builtin.OptimizeMode,
115122
dynamic: bool,
123+
build_options_mod: *std.Build.Module,
116124
) struct { *std.Build.Module, *std.Build.Step.Compile } {
117125
const lib_mod = b.createModule(.{
118126
.root_source_file = b.path("src/root.zig"),
@@ -146,11 +154,24 @@ fn buildOdiff(
146154
});
147155

148156
exe_mod.addImport("odiff_lib", lib_mod);
149-
150-
const options = b.addOptions();
151-
options.addOption([]const u8, "version", manifest.version);
152-
exe_mod.addImport("build_options", options.createModule());
153-
lib_mod.addImport("build_options", options.createModule());
157+
exe_mod.addImport("build_options", build_options_mod);
158+
lib_mod.addImport("build_options", build_options_mod);
159+
160+
if (target.result.cpu.arch == .x86_64) {
161+
const os_tag = target.result.os.tag;
162+
const fmt: ?[]const u8 = switch (os_tag) {
163+
.linux => "elf64",
164+
.macos => "macho64",
165+
else => null,
166+
};
167+
168+
if (fmt) |nasm_fmt| {
169+
const nasm = b.addSystemCommand(&.{ "nasm", "-f", nasm_fmt, "-o" });
170+
const asm_obj = nasm.addOutputFileArg("vxdiff.o");
171+
nasm.addFileArg(b.path("src/vxdiff.asm"));
172+
lib_mod.addObjectFile(asm_obj);
173+
}
174+
}
154175

155176
const exe = b.addExecutable(.{
156177
.name = "odiff",

src/cli.zig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub const CliArgs = struct {
1616
antialiasing: bool = false,
1717
diff_lines: bool = false,
1818
reduce_ram_usage: bool = false,
19+
enable_asm: bool = false,
1920
ignore_regions: std.array_list.Managed(diff.IgnoreRegion),
2021
allocator: std.mem.Allocator,
2122

@@ -50,6 +51,7 @@ fn printUsage(program_name: []const u8) void {
5051
print(" --aa, --antialiasing Ignore antialiased pixels in diff\n", .{});
5152
print(" --output-diff-lines Output line numbers with differences\n", .{});
5253
print(" --reduce-ram-usage Use less memory (slower)\n", .{});
54+
print(" --enable-asm Enable AVX-512 optimized asm path when supported (x86_64 only)\n", .{});
5355
print(" -i, --ignore <regions> Ignore regions (format: x1:y1-x2:y2,x3:y3-x4:y4)\n", .{});
5456
print(" -h, --help Show this help message\n", .{});
5557
print(" --version Show version\n", .{});
@@ -158,6 +160,8 @@ pub fn parseArgs(allocator: std.mem.Allocator) !CliArgs {
158160
parsed_args.diff_lines = true;
159161
} else if (std.mem.eql(u8, arg, "--reduce-ram-usage")) {
160162
parsed_args.reduce_ram_usage = true;
163+
} else if (std.mem.eql(u8, arg, "--enable-asm")) {
164+
parsed_args.enable_asm = true;
161165
} else if (std.mem.eql(u8, arg, "-i") or std.mem.eql(u8, arg, "--ignore")) {
162166
i += 1;
163167
if (i >= args.len) {

src/diff.zig

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ const antialiasing = @import("antialiasing.zig");
88
const Image = image_io.Image;
99
const ArrayList = std.ArrayList;
1010

11-
const HAS_AVX512 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
11+
const HAS_AVX512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
12+
const HAS_AVX512bwvl =
13+
HAS_AVX512f and
14+
std.Target.x86.featureSetHas(builtin.cpu.features, .avx512bw) and
15+
std.Target.x86.featureSetHas(builtin.cpu.features, .avx512vl);
1216
const HAS_NEON = std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon);
1317

1418
const RED_PIXEL: u32 = 0xFF0000FF;
@@ -73,6 +77,7 @@ pub const DiffOptions = struct {
7377
ignore_regions: ?[]const IgnoreRegion = null,
7478
capture_diff: bool = true,
7579
fail_on_layout_change: bool = true,
80+
enable_asm: bool = false,
7681
};
7782

7883
fn unrollIgnoreRegions(width: u32, regions: ?[]const IgnoreRegion, allocator: std.mem.Allocator) !?[]struct { u32, u32 } {
@@ -133,7 +138,14 @@ pub noinline fn compare(
133138

134139
const layout_difference = base.width != comp.width or base.height != comp.height;
135140

136-
if (layout_difference) {
141+
// AVX diff only supports default options
142+
const threshold_ok = @abs(options.threshold - 0.1) < 0.0000001;
143+
const no_ignore_regions = options.ignore_regions == null or options.ignore_regions.?.len == 0;
144+
const avx_compatible = !options.antialiasing and no_ignore_regions and !options.capture_diff and !options.diff_lines and threshold_ok;
145+
146+
if (options.enable_asm and HAS_AVX512bwvl and avx_compatible) {
147+
try compareAVX(base, comp, &diff_count);
148+
} else if (layout_difference) {
137149
// slow path for different layout or weird widths
138150
try compareDifferentLayouts(base, comp, &diff_output, &diff_count, if (diff_lines != null) &diff_lines.? else null, ignore_regions, max_delta_i64, options);
139151
} else {
@@ -217,7 +229,7 @@ pub noinline fn compareSameLayouts(base: *const Image, comp: *const Image, diff_
217229
const base_data = base.data;
218230
const comp_data = comp.data;
219231

220-
const SIMD_SIZE = std.simd.suggestVectorLength(u32) orelse if (HAS_AVX512) 16 else if (HAS_NEON) 8 else 4;
232+
const SIMD_SIZE = std.simd.suggestVectorLength(u32) orelse if (HAS_AVX512f) 16 else if (HAS_NEON) 8 else 4;
221233
const simd_end = (size / SIMD_SIZE) * SIMD_SIZE;
222234

223235
var offset: usize = 0;
@@ -328,6 +340,29 @@ pub fn compareDifferentLayouts(base: *const Image, comp: *const Image, maybe_dif
328340
}
329341
}
330342

343+
pub fn compareAVX(base: *const Image, comp: *const Image, diff_count: *u32) !void {
344+
if (!HAS_AVX512bwvl) return error.Invalid;
345+
346+
const base_ptr: [*]const u8 = @ptrCast(@alignCast(base.data.ptr));
347+
const comp_ptr: [*]const u8 = @ptrCast(@alignCast(comp.data.ptr));
348+
349+
const base_w: usize = base.width;
350+
const base_h: usize = base.height;
351+
const comp_w: usize = comp.width;
352+
const comp_h: usize = comp.height;
353+
354+
diff_count.* = vxdiff(base_ptr, comp_ptr, base_w, comp_w, base_h, comp_h);
355+
}
356+
357+
extern fn vxdiff(
358+
base_rgba: [*]const u8,
359+
comp_rgba: [*]const u8,
360+
base_width: usize,
361+
comp_width: usize,
362+
base_height: usize,
363+
comp_height: usize,
364+
) u32;
365+
331366
pub fn diff(
332367
base: *const Image,
333368
comp: *const Image,

src/main.zig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ pub fn main() !void {
7878
.diff_lines = args.diff_lines,
7979
.ignore_regions = args.ignore_regions.items,
8080
.capture_diff = args.diff_output != null,
81+
.enable_asm = args.enable_asm,
8182
};
8283

8384
const result = diff.diff(&base_img, &comp_img, diff_options, allocator) catch |err| {

src/test_avx.zig

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
const std = @import("std");
2+
const testing = std.testing;
3+
const expect = testing.expect;
4+
const expectEqual = testing.expectEqual;
5+
const expectApproxEqRel = testing.expectApproxEqRel;
6+
7+
const odiff = @import("root.zig");
8+
const image_io = odiff.image_io;
9+
const diff = odiff.diff;
10+
const color_delta = odiff.color_delta;
11+
12+
fn loadTestImage(path: []const u8, allocator: std.mem.Allocator) !image_io.Image {
13+
return image_io.loadImage(path, allocator) catch |err| {
14+
std.debug.print("Failed to load image: {s}\nError: {}\n", .{ path, err });
15+
return err;
16+
};
17+
}
18+
19+
test "layoutDifference: diff images with different layouts without capture" {
20+
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
21+
defer _ = gpa.deinit();
22+
const allocator = gpa.allocator();
23+
24+
var img1 = try loadTestImage("test/png/white4x4.png", allocator);
25+
defer img1.deinit();
26+
27+
var img2 = try loadTestImage("test/png/purple8x8.png", allocator);
28+
defer img2.deinit();
29+
30+
const options = diff.DiffOptions{
31+
.antialiasing = false,
32+
.output_diff_mask = false,
33+
.capture_diff = false,
34+
.enable_asm = true,
35+
};
36+
37+
var diff_output, const diff_count, const diff_percentage, var diff_lines = try diff.compare(&img1, &img2, options, allocator);
38+
defer if (diff_output) |*img| img.deinit();
39+
defer if (diff_lines) |*lines| lines.deinit();
40+
41+
try expectEqual(@as(u32, 16), diff_count); // diffPixels
42+
try expectApproxEqRel(@as(f64, 100.0), diff_percentage, 0.001); // diffPercentage
43+
}
44+
45+
test "PNG: finds difference between 2 images without capture" {
46+
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
47+
defer _ = gpa.deinit();
48+
const allocator = gpa.allocator();
49+
50+
var img1 = try loadTestImage("test/png/orange.png", allocator);
51+
defer img1.deinit();
52+
53+
var img2 = try loadTestImage("test/png/orange_changed.png", allocator);
54+
defer img2.deinit();
55+
56+
const options = diff.DiffOptions{
57+
.capture_diff = false,
58+
.enable_asm = true,
59+
};
60+
var diff_output, const diff_count, const diff_percentage, var diff_lines = try diff.compare(&img1, &img2, options, allocator);
61+
defer if (diff_output) |*img| img.deinit();
62+
defer if (diff_lines) |*lines| lines.deinit();
63+
64+
try expectEqual(@as(u32, 1366), diff_count); // diffPixels
65+
try expectApproxEqRel(@as(f64, 1.14), diff_percentage, 0.1); // diffPercentage
66+
}

0 commit comments

Comments
 (0)