Skip to content

Commit 3136789

Browse files
Mike KleinSkia Commit-Bot
authored andcommitted
refactor large load/stores
This noop refactor ports the lane-immediate idea to the 64-bit loads and to 128-bit stores, and renames to simple load64, load128, store128. The store128 part of this change is _slightly_ sneaky in that we pack the argument pointer index and the lane both into int immz, but there's plenty of space there: lane needs 1 bit, and the argument pointer index needs maybe 3 or 4 max. Change-Id: I3fa01bf31312b8a69c7e287d649470ba15a8ea40 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/306810 Auto-Submit: Mike Klein <mtklein@google.com> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
1 parent 89b3c1f commit 3136789

File tree

4 files changed

+112
-143
lines changed

4 files changed

+112
-143
lines changed

src/core/SkVM.cpp

Lines changed: 51 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -263,21 +263,19 @@ namespace skvm {
263263
switch (op) {
264264
case Op::assert_true: write(o, op, V{x}, V{y}, fs(id)...); break;
265265

266-
case Op::store8: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
267-
case Op::store16: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
268-
case Op::store32: write(o, op, Arg{immy}, V{x}, fs(id)...); break;
269-
case Op::store64: write(o, op, Arg{immz}, V{x}, V{y}, fs(id)...); break;
270-
case Op::store128_lo: write(o, op, Arg{immz}, V{x}, V{y}, fs(id)...); break;
271-
case Op::store128_hi: write(o, op, Arg{immz}, V{x}, V{y}, fs(id)...); break;
266+
case Op::store8: write(o, op, Arg{immy} , V{x}, fs(id)...); break;
267+
case Op::store16: write(o, op, Arg{immy} , V{x}, fs(id)...); break;
268+
case Op::store32: write(o, op, Arg{immy} , V{x}, fs(id)...); break;
269+
case Op::store64: write(o, op, Arg{immz} , V{x},V{y}, fs(id)...); break;
270+
case Op::store128: write(o, op, Arg{immz>>1}, V{x},V{y},Hex{immz&1}, fs(id)...); break;
272271

273272
case Op::index: write(o, V{id}, "=", op, fs(id)...); break;
274273

275-
case Op::load8: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
276-
case Op::load16: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
277-
case Op::load32: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
278-
case Op::load64_lo: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
279-
case Op::load64_hi: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
280-
case Op::load128_32: write(o, V{id}, "=", op, Arg{immy}, Arg{immz}, fs(id)...); break;
274+
case Op::load8: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
275+
case Op::load16: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
276+
case Op::load32: write(o, V{id}, "=", op, Arg{immy}, fs(id)...); break;
277+
case Op::load64: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, fs(id)...); break;
278+
case Op::load128: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, fs(id)...); break;
281279

282280
case Op::gather8: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}, fs(id)...); break;
283281
case Op::gather16: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}, fs(id)...); break;
@@ -391,21 +389,19 @@ namespace skvm {
391389
switch (op) {
392390
case Op::assert_true: write(o, op, R{x}, R{y}); break;
393391

394-
case Op::store8: write(o, op, Arg{immy}, R{x}); break;
395-
case Op::store16: write(o, op, Arg{immy}, R{x}); break;
396-
case Op::store32: write(o, op, Arg{immy}, R{x}); break;
397-
case Op::store64: write(o, op, Arg{immz}, R{x}, R{y}); break;
398-
case Op::store128_lo: write(o, op, Arg{immz}, R{x}, R{y}); break;
399-
case Op::store128_hi: write(o, op, Arg{immz}, R{x}, R{y}); break;
392+
case Op::store8: write(o, op, Arg{immy} , R{x} ); break;
393+
case Op::store16: write(o, op, Arg{immy} , R{x} ); break;
394+
case Op::store32: write(o, op, Arg{immy} , R{x} ); break;
395+
case Op::store64: write(o, op, Arg{immz} , R{x}, R{y} ); break;
396+
case Op::store128: write(o, op, Arg{immz>>1}, R{x}, R{y}, Hex{immz&1}); break;
400397

401398
case Op::index: write(o, R{d}, "=", op); break;
402399

403-
case Op::load8: write(o, R{d}, "=", op, Arg{immy}); break;
404-
case Op::load16: write(o, R{d}, "=", op, Arg{immy}); break;
405-
case Op::load32: write(o, R{d}, "=", op, Arg{immy}); break;
406-
case Op::load64_lo: write(o, R{d}, "=", op, Arg{immy}); break;
407-
case Op::load64_hi: write(o, R{d}, "=", op, Arg{immy}); break;
408-
case Op::load128_32: write(o, R{d}, "=", op, Arg{immy}, Arg{immz}); break;
400+
case Op::load8: write(o, R{d}, "=", op, Arg{immy}); break;
401+
case Op::load16: write(o, R{d}, "=", op, Arg{immy}); break;
402+
case Op::load32: write(o, R{d}, "=", op, Arg{immy}); break;
403+
case Op::load64: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}); break;
404+
case Op::load128: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}); break;
409405

410406
case Op::gather8: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
411407
case Op::gather16: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
@@ -695,22 +691,20 @@ namespace skvm {
695691
void Builder::store64(Arg ptr, I32 lo, I32 hi) {
696692
(void)push(Op::store64, lo.id,hi.id,NA, NA,ptr.ix);
697693
}
698-
void Builder::store128_lo(Arg ptr, I32 lo, I32 hi) {
699-
(void)push(Op::store128_lo, lo.id,hi.id,NA, NA,ptr.ix);
700-
}
701-
void Builder::store128_hi(Arg ptr, I32 lo, I32 hi) {
702-
(void)push(Op::store128_hi, lo.id,hi.id,NA, NA,ptr.ix);
694+
void Builder::store128(Arg ptr, I32 lo, I32 hi, int lane) {
695+
(void)push(Op::store128, lo.id,hi.id,NA, NA,(ptr.ix<<1)|(lane&1));
703696
}
704697

705698
I32 Builder::index() { return {this, push(Op::index , NA,NA,NA,0) }; }
706699

707-
I32 Builder::load8 (Arg ptr) { return {this, push(Op::load8 , NA,NA,NA, ptr.ix) }; }
708-
I32 Builder::load16 (Arg ptr) { return {this, push(Op::load16 , NA,NA,NA, ptr.ix) }; }
709-
I32 Builder::load32 (Arg ptr) { return {this, push(Op::load32 , NA,NA,NA, ptr.ix) }; }
710-
I32 Builder::load64_lo (Arg ptr) { return {this, push(Op::load64_lo, NA,NA,NA, ptr.ix) }; }
711-
I32 Builder::load64_hi (Arg ptr) { return {this, push(Op::load64_hi, NA,NA,NA, ptr.ix) }; }
712-
I32 Builder::load128_32(Arg ptr, int lane) {
713-
return {this, push(Op::load128_32, NA,NA,NA, ptr.ix,lane) };
700+
I32 Builder::load8 (Arg ptr) { return {this, push(Op::load8 , NA,NA,NA, ptr.ix) }; }
701+
I32 Builder::load16(Arg ptr) { return {this, push(Op::load16, NA,NA,NA, ptr.ix) }; }
702+
I32 Builder::load32(Arg ptr) { return {this, push(Op::load32, NA,NA,NA, ptr.ix) }; }
703+
I32 Builder::load64(Arg ptr, int lane) {
704+
return {this, push(Op::load64 , NA,NA,NA, ptr.ix,lane) };
705+
}
706+
I32 Builder::load128(Arg ptr, int lane) {
707+
return {this, push(Op::load128, NA,NA,NA, ptr.ix,lane) };
714708
}
715709

716710
I32 Builder::gather8 (Arg ptr, int offset, I32 index) {
@@ -1269,8 +1263,8 @@ namespace skvm {
12691263
case 8: {
12701264
PixelFormat lo,hi;
12711265
split_disjoint_8byte_format(f, &lo,&hi);
1272-
Color l = unpack(lo, load64_lo(ptr)),
1273-
h = unpack(hi, load64_hi(ptr));
1266+
Color l = unpack(lo, load64(ptr, 0)),
1267+
h = unpack(hi, load64(ptr, 1));
12741268
return {
12751269
lo.r_bits ? l.r : h.r,
12761270
lo.g_bits ? l.g : h.g,
@@ -1281,10 +1275,10 @@ namespace skvm {
12811275
case 16: {
12821276
assert_16byte_is_rgba_f32(f);
12831277
return {
1284-
bit_cast(load128_32(ptr, 0)),
1285-
bit_cast(load128_32(ptr, 1)),
1286-
bit_cast(load128_32(ptr, 2)),
1287-
bit_cast(load128_32(ptr, 3)),
1278+
bit_cast(load128(ptr, 0)),
1279+
bit_cast(load128(ptr, 1)),
1280+
bit_cast(load128(ptr, 2)),
1281+
bit_cast(load128(ptr, 3)),
12881282
};
12891283
}
12901284
default: SkUNREACHABLE;
@@ -1366,8 +1360,8 @@ namespace skvm {
13661360
}
13671361
case 16: {
13681362
assert_16byte_is_rgba_f32(f);
1369-
store128_lo(ptr, bit_cast(c.r), bit_cast(c.g));
1370-
store128_hi(ptr, bit_cast(c.b), bit_cast(c.a));
1363+
store128(ptr, bit_cast(c.r), bit_cast(c.g), 0);
1364+
store128(ptr, bit_cast(c.b), bit_cast(c.a), 1);
13711365
return true;
13721366
}
13731367
default: SkUNREACHABLE;
@@ -3426,9 +3420,8 @@ namespace skvm {
34263420
(void)constants[immy];
34273421
break;
34283422

3429-
case Op::store128_lo:
3430-
case Op::store128_hi:
3431-
case Op::load128_32:
3423+
case Op::store128:
3424+
case Op::load128:
34323425
// TODO
34333426
return false;
34343427

@@ -3501,30 +3494,17 @@ namespace skvm {
35013494
else { a->vmovups( dst(), A::Mem{arg[immy]}); }
35023495
break;
35033496

3504-
case Op::load64_lo: if (scalar) {
3505-
a->vmovd((A::Xmm)dst(), A::Mem{arg[immy], 0});
3506-
} else {
3507-
A::Ymm tmp = alloc_tmp();
3508-
a->vmovups(tmp, &load64_index);
3509-
a->vpermps(dst(), tmp, A::Mem{arg[immy], 0});
3510-
a->vpermps( tmp, tmp, A::Mem{arg[immy], 32});
3511-
// Select low 128-bits holding 0,2,4,6 from each.
3512-
a->vperm2f128(dst(), dst(),tmp, 0x20);
3513-
free_tmp(tmp);
3514-
} break;
3515-
3516-
case Op::load64_hi: if (scalar) {
3517-
a->vmovd((A::Xmm)dst(), A::Mem{arg[immy], 4});
3518-
} else {
3519-
A::Ymm tmp = alloc_tmp();
3520-
a->vmovups(tmp, &load64_index);
3521-
a->vpermps(dst(), tmp, A::Mem{arg[immy], 0});
3522-
a->vpermps( tmp, tmp, A::Mem{arg[immy], 32});
3523-
// Select high 128-bits holding 1,3,5,7 from each.
3524-
a->vperm2f128(dst(), dst(),tmp, 0x31);
3525-
free_tmp(tmp);
3526-
} break;
3527-
3497+
case Op::load64: if (scalar) {
3498+
a->vmovd((A::Xmm)dst(), A::Mem{arg[immy], 4*immz});
3499+
} else {
3500+
A::Ymm tmp = alloc_tmp();
3501+
a->vmovups(tmp, &load64_index);
3502+
a->vpermps(dst(), tmp, A::Mem{arg[immy], 0});
3503+
a->vpermps( tmp, tmp, A::Mem{arg[immy], 32});
3504+
// Low 128 bits holds immz=0 lanes, high 128 bits holds immz=1.
3505+
a->vperm2f128(dst(), dst(),tmp, immz ? 0x31 : 0x20);
3506+
free_tmp(tmp);
3507+
} break;
35283508

35293509
case Op::gather8: {
35303510
// As usual, the gather base pointer is immz bytes off of uniform immy.

src/core/SkVM.h

Lines changed: 49 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -370,36 +370,36 @@ namespace skvm {
370370
int disp19(Label*);
371371
};
372372

373-
// Order matters a little: Ops <=store128_hi are treated as having side effects.
374-
#define SKVM_OPS(M) \
375-
M(assert_true) \
376-
M(store8) M(store16) M(store32) M(store64) M(store128_lo) M(store128_hi) \
377-
M(index) \
378-
M(load8) M(load16) M(load32) M(load64_lo) M(load64_hi) M(load128_32) \
379-
M(gather8) M(gather16) M(gather32) \
380-
M(uniform8) M(uniform16) M(uniform32) \
381-
M(splat) \
382-
M(add_f32) M(add_i32) \
383-
M(sub_f32) M(sub_i32) \
384-
M(mul_f32) M(mul_i32) \
385-
M(div_f32) \
386-
M(min_f32) \
387-
M(max_f32) \
388-
M(fma_f32) M(fms_f32) M(fnma_f32) \
389-
M(sqrt_f32) \
390-
M(shl_i32) M(shr_i32) M(sra_i32) \
391-
M(ceil) M(floor) \
392-
M(trunc) M(round) M(to_half) M(from_half) \
393-
M(to_f32) \
394-
M( eq_f32) M( eq_i32) \
395-
M(neq_f32) \
396-
M( gt_f32) M( gt_i32) \
397-
M(gte_f32) \
398-
M(bit_and) \
399-
M(bit_or) \
400-
M(bit_xor) \
401-
M(bit_clear) \
402-
M(select) M(pack) \
373+
// Order matters a little: Ops <=store128 are treated as having side effects.
374+
#define SKVM_OPS(M) \
375+
M(assert_true) \
376+
M(store8) M(store16) M(store32) M(store64) M(store128) \
377+
M(index) \
378+
M(load8) M(load16) M(load32) M(load64) M(load128) \
379+
M(gather8) M(gather16) M(gather32) \
380+
M(uniform8) M(uniform16) M(uniform32) \
381+
M(splat) \
382+
M(add_f32) M(add_i32) \
383+
M(sub_f32) M(sub_i32) \
384+
M(mul_f32) M(mul_i32) \
385+
M(div_f32) \
386+
M(min_f32) \
387+
M(max_f32) \
388+
M(fma_f32) M(fms_f32) M(fnma_f32) \
389+
M(sqrt_f32) \
390+
M(shl_i32) M(shr_i32) M(sra_i32) \
391+
M(ceil) M(floor) \
392+
M(trunc) M(round) M(to_half) M(from_half) \
393+
M(to_f32) \
394+
M( eq_f32) M( eq_i32) \
395+
M(neq_f32) \
396+
M( gt_f32) M( gt_i32) \
397+
M(gte_f32) \
398+
M(bit_and) \
399+
M(bit_or) \
400+
M(bit_xor) \
401+
M(bit_clear) \
402+
M(select) M(pack) \
403403
// End of SKVM_OPS
404404

405405
enum class Op : int {
@@ -409,7 +409,7 @@ namespace skvm {
409409
};
410410

411411
static inline bool has_side_effect(Op op) {
412-
return op <= Op::store128_hi;
412+
return op <= Op::store128;
413413
}
414414
static inline bool is_always_varying(Op op) {
415415
return op <= Op::gather32 && op != Op::assert_true;
@@ -573,25 +573,23 @@ namespace skvm {
573573
void assert_true(I32 cond) { assert_true(cond, cond); }
574574

575575
// Store {8,16,32,64,128}-bit varying.
576-
void store8 (Arg ptr, I32 val);
577-
void store16 (Arg ptr, I32 val);
578-
void store32 (Arg ptr, I32 val);
579-
void storeF (Arg ptr, F32 val) { store32(ptr, bit_cast(val)); }
580-
void store64 (Arg ptr, I32 lo, I32 hi);
581-
void store128_lo(Arg ptr, I32 lo, I32 hi); // Low half of 128-bit lane = lo|(hi<<32).
582-
void store128_hi(Arg ptr, I32 lo, I32 hi); // High half of 128-bit lane = lo|(hi<<32).
576+
void store8 (Arg ptr, I32 val);
577+
void store16 (Arg ptr, I32 val);
578+
void store32 (Arg ptr, I32 val);
579+
void storeF (Arg ptr, F32 val) { store32(ptr, bit_cast(val)); }
580+
void store64 (Arg ptr, I32 lo, I32 hi); // *ptr = lo|(hi<<32)
581+
void store128(Arg ptr, I32 lo, I32 hi, int lane); // 64-bit lane 0-1 at ptr = lo|(hi<<32).
583582

584583
// Returns varying {n, n-1, n-2, ..., 1}, where n is the argument to Program::eval().
585584
I32 index();
586585

587586
// Load {8,16,32,64,128}-bit varying.
588-
I32 load8 (Arg ptr);
589-
I32 load16 (Arg ptr);
590-
I32 load32 (Arg ptr);
591-
F32 loadF (Arg ptr) { return bit_cast(load32(ptr)); }
592-
I32 load64_lo (Arg ptr);
593-
I32 load64_hi (Arg ptr);
594-
I32 load128_32(Arg ptr, int lane); // Load 32-bit lane 0-3 of 128-bit lane.
587+
I32 load8 (Arg ptr);
588+
I32 load16 (Arg ptr);
589+
I32 load32 (Arg ptr);
590+
F32 loadF (Arg ptr) { return bit_cast(load32(ptr)); }
591+
I32 load64 (Arg ptr, int lane); // Load 32-bit lane 0-1 of 64-bit value.
592+
I32 load128(Arg ptr, int lane); // Load 32-bit lane 0-3 of 128-bit value.
595593

596594
// Load u8,u16,i32 uniform with byte-count offset.
597595
I32 uniform8 (Arg ptr, int offset);
@@ -975,13 +973,12 @@ namespace skvm {
975973
static inline void assert_true(I32 cond, F32 debug) { cond->assert_true(cond,debug); }
976974
static inline void assert_true(I32 cond) { cond->assert_true(cond); }
977975

978-
static inline void store8 (Arg ptr, I32 val) { val->store8 (ptr, val); }
979-
static inline void store16 (Arg ptr, I32 val) { val->store16 (ptr, val); }
980-
static inline void store32 (Arg ptr, I32 val) { val->store32 (ptr, val); }
981-
static inline void storeF (Arg ptr, F32 val) { val->storeF (ptr, val); }
982-
static inline void store64 (Arg ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); }
983-
static inline void store128_lo(Arg ptr, I32 lo, I32 hi) { lo ->store128_lo(ptr, lo,hi); }
984-
static inline void store128_hi(Arg ptr, I32 lo, I32 hi) { lo ->store128_hi(ptr, lo,hi); }
976+
static inline void store8 (Arg ptr, I32 val) { val->store8 (ptr, val); }
977+
static inline void store16 (Arg ptr, I32 val) { val->store16 (ptr, val); }
978+
static inline void store32 (Arg ptr, I32 val) { val->store32 (ptr, val); }
979+
static inline void storeF (Arg ptr, F32 val) { val->storeF (ptr, val); }
980+
static inline void store64 (Arg ptr, I32 lo, I32 hi) { lo ->store64 (ptr, lo,hi); }
981+
static inline void store128(Arg ptr, I32 lo, I32 hi, int ix) { lo ->store128(ptr, lo,hi, ix); }
985982

986983
static inline I32 gather8 (Arg ptr, int off, I32 ix) { return ix->gather8 (ptr, off, ix); }
987984
static inline I32 gather16(Arg ptr, int off, I32 ix) { return ix->gather16(ptr, off, ix); }

0 commit comments

Comments
 (0)