@@ -263,21 +263,19 @@ namespace skvm {
263263 switch (op) {
264264 case Op::assert_true: write (o, op, V{x}, V{y}, fs (id)...); break ;
265265
266- case Op::store8: write (o, op, Arg{immy}, V{x}, fs (id)...); break ;
267- case Op::store16: write (o, op, Arg{immy}, V{x}, fs (id)...); break ;
268- case Op::store32: write (o, op, Arg{immy}, V{x}, fs (id)...); break ;
269- case Op::store64: write (o, op, Arg{immz}, V{x}, V{y}, fs (id)...); break ;
270- case Op::store128_lo: write (o, op, Arg{immz}, V{x}, V{y}, fs (id)...); break ;
271- case Op::store128_hi: write (o, op, Arg{immz}, V{x}, V{y}, fs (id)...); break ;
266+ case Op::store8: write (o, op, Arg{immy} , V{x}, fs (id)...); break ;
267+ case Op::store16: write (o, op, Arg{immy} , V{x}, fs (id)...); break ;
268+ case Op::store32: write (o, op, Arg{immy} , V{x}, fs (id)...); break ;
269+ case Op::store64: write (o, op, Arg{immz} , V{x},V{y}, fs (id)...); break ;
270+ case Op::store128: write (o, op, Arg{immz>>1 }, V{x},V{y},Hex{immz&1 }, fs (id)...); break ;
272271
273272 case Op::index: write (o, V{id}, " =" , op, fs (id)...); break ;
274273
275- case Op::load8: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
276- case Op::load16: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
277- case Op::load32: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
278- case Op::load64_lo: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
279- case Op::load64_hi: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
280- case Op::load128_32: write (o, V{id}, " =" , op, Arg{immy}, Arg{immz}, fs (id)...); break ;
274+ case Op::load8: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
275+ case Op::load16: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
276+ case Op::load32: write (o, V{id}, " =" , op, Arg{immy}, fs (id)...); break ;
277+ case Op::load64: write (o, V{id}, " =" , op, Arg{immy}, Hex{immz}, fs (id)...); break ;
278+ case Op::load128: write (o, V{id}, " =" , op, Arg{immy}, Hex{immz}, fs (id)...); break ;
281279
282280 case Op::gather8: write (o, V{id}, " =" , op, Arg{immy}, Hex{immz}, V{x}, fs (id)...); break ;
283281 case Op::gather16: write (o, V{id}, " =" , op, Arg{immy}, Hex{immz}, V{x}, fs (id)...); break ;
@@ -391,21 +389,19 @@ namespace skvm {
391389 switch (op) {
392390 case Op::assert_true: write (o, op, R{x}, R{y}); break ;
393391
394- case Op::store8: write (o, op, Arg{immy}, R{x}); break ;
395- case Op::store16: write (o, op, Arg{immy}, R{x}); break ;
396- case Op::store32: write (o, op, Arg{immy}, R{x}); break ;
397- case Op::store64: write (o, op, Arg{immz}, R{x}, R{y}); break ;
398- case Op::store128_lo: write (o, op, Arg{immz}, R{x}, R{y}); break ;
399- case Op::store128_hi: write (o, op, Arg{immz}, R{x}, R{y}); break ;
392+ case Op::store8: write (o, op, Arg{immy} , R{x} ); break ;
393+ case Op::store16: write (o, op, Arg{immy} , R{x} ); break ;
394+ case Op::store32: write (o, op, Arg{immy} , R{x} ); break ;
395+ case Op::store64: write (o, op, Arg{immz} , R{x}, R{y} ); break ;
396+ case Op::store128: write (o, op, Arg{immz>>1 }, R{x}, R{y}, Hex{immz&1 }); break ;
400397
401398 case Op::index: write (o, R{d}, " =" , op); break ;
402399
403- case Op::load8: write (o, R{d}, " =" , op, Arg{immy}); break ;
404- case Op::load16: write (o, R{d}, " =" , op, Arg{immy}); break ;
405- case Op::load32: write (o, R{d}, " =" , op, Arg{immy}); break ;
406- case Op::load64_lo: write (o, R{d}, " =" , op, Arg{immy}); break ;
407- case Op::load64_hi: write (o, R{d}, " =" , op, Arg{immy}); break ;
408- case Op::load128_32: write (o, R{d}, " =" , op, Arg{immy}, Arg{immz}); break ;
400+ case Op::load8: write (o, R{d}, " =" , op, Arg{immy}); break ;
401+ case Op::load16: write (o, R{d}, " =" , op, Arg{immy}); break ;
402+ case Op::load32: write (o, R{d}, " =" , op, Arg{immy}); break ;
403+ case Op::load64: write (o, R{d}, " =" , op, Arg{immy}, Hex{immz}); break ;
404+ case Op::load128: write (o, R{d}, " =" , op, Arg{immy}, Hex{immz}); break ;
409405
410406 case Op::gather8: write (o, R{d}, " =" , op, Arg{immy}, Hex{immz}, R{x}); break ;
411407 case Op::gather16: write (o, R{d}, " =" , op, Arg{immy}, Hex{immz}, R{x}); break ;
@@ -695,22 +691,20 @@ namespace skvm {
695691 void Builder::store64 (Arg ptr, I32 lo, I32 hi) {
696692 (void )push (Op::store64, lo.id ,hi.id ,NA, NA,ptr.ix );
697693 }
698- void Builder::store128_lo (Arg ptr, I32 lo, I32 hi) {
699- (void )push (Op::store128_lo, lo.id ,hi.id ,NA, NA,ptr.ix );
700- }
701- void Builder::store128_hi (Arg ptr, I32 lo, I32 hi) {
702- (void )push (Op::store128_hi, lo.id ,hi.id ,NA, NA,ptr.ix );
694+ void Builder::store128 (Arg ptr, I32 lo, I32 hi, int lane) {
695+ (void )push (Op::store128, lo.id ,hi.id ,NA, NA,(ptr.ix <<1 )|(lane&1 ));
703696 }
704697
705698 I32 Builder::index () { return {this , push (Op::index , NA,NA,NA,0 ) }; }
706699
707- I32 Builder::load8 (Arg ptr) { return {this , push (Op::load8 , NA,NA,NA, ptr.ix ) }; }
708- I32 Builder::load16 (Arg ptr) { return {this , push (Op::load16 , NA,NA,NA, ptr.ix ) }; }
709- I32 Builder::load32 (Arg ptr) { return {this , push (Op::load32 , NA,NA,NA, ptr.ix ) }; }
710- I32 Builder::load64_lo (Arg ptr) { return {this , push (Op::load64_lo, NA,NA,NA, ptr.ix ) }; }
711- I32 Builder::load64_hi (Arg ptr) { return {this , push (Op::load64_hi, NA,NA,NA, ptr.ix ) }; }
712- I32 Builder::load128_32 (Arg ptr, int lane) {
713- return {this , push (Op::load128_32, NA,NA,NA, ptr.ix ,lane) };
700+ I32 Builder::load8 (Arg ptr) { return {this , push (Op::load8 , NA,NA,NA, ptr.ix ) }; }
701+ I32 Builder::load16 (Arg ptr) { return {this , push (Op::load16, NA,NA,NA, ptr.ix ) }; }
702+ I32 Builder::load32 (Arg ptr) { return {this , push (Op::load32, NA,NA,NA, ptr.ix ) }; }
703+ I32 Builder::load64 (Arg ptr, int lane) {
704+ return {this , push (Op::load64 , NA,NA,NA, ptr.ix ,lane) };
705+ }
706+ I32 Builder::load128 (Arg ptr, int lane) {
707+ return {this , push (Op::load128, NA,NA,NA, ptr.ix ,lane) };
714708 }
715709
716710 I32 Builder::gather8 (Arg ptr, int offset, I32 index) {
@@ -1269,8 +1263,8 @@ namespace skvm {
12691263 case 8 : {
12701264 PixelFormat lo,hi;
12711265 split_disjoint_8byte_format (f, &lo,&hi);
1272- Color l = unpack (lo, load64_lo (ptr)),
1273- h = unpack (hi, load64_hi (ptr));
1266+ Color l = unpack (lo, load64 (ptr, 0 )),
1267+ h = unpack (hi, load64 (ptr, 1 ));
12741268 return {
12751269 lo.r_bits ? l.r : h.r ,
12761270 lo.g_bits ? l.g : h.g ,
@@ -1281,10 +1275,10 @@ namespace skvm {
12811275 case 16 : {
12821276 assert_16byte_is_rgba_f32 (f);
12831277 return {
1284- bit_cast (load128_32 (ptr, 0 )),
1285- bit_cast (load128_32 (ptr, 1 )),
1286- bit_cast (load128_32 (ptr, 2 )),
1287- bit_cast (load128_32 (ptr, 3 )),
1278+ bit_cast (load128 (ptr, 0 )),
1279+ bit_cast (load128 (ptr, 1 )),
1280+ bit_cast (load128 (ptr, 2 )),
1281+ bit_cast (load128 (ptr, 3 )),
12881282 };
12891283 }
12901284 default : SkUNREACHABLE;
@@ -1366,8 +1360,8 @@ namespace skvm {
13661360 }
13671361 case 16 : {
13681362 assert_16byte_is_rgba_f32 (f);
1369- store128_lo (ptr, bit_cast (c.r ), bit_cast (c.g ));
1370- store128_hi (ptr, bit_cast (c.b ), bit_cast (c.a ));
1363+ store128 (ptr, bit_cast (c.r ), bit_cast (c.g ), 0 );
1364+ store128 (ptr, bit_cast (c.b ), bit_cast (c.a ), 1 );
13711365 return true ;
13721366 }
13731367 default : SkUNREACHABLE;
@@ -3426,9 +3420,8 @@ namespace skvm {
34263420 (void )constants[immy];
34273421 break ;
34283422
3429- case Op::store128_lo:
3430- case Op::store128_hi:
3431- case Op::load128_32:
3423+ case Op::store128:
3424+ case Op::load128:
34323425 // TODO
34333426 return false ;
34343427
@@ -3501,30 +3494,17 @@ namespace skvm {
35013494 else { a->vmovups ( dst (), A::Mem{arg[immy]}); }
35023495 break ;
35033496
3504- case Op::load64_lo: if (scalar) {
3505- a->vmovd ((A::Xmm)dst (), A::Mem{arg[immy], 0 });
3506- } else {
3507- A::Ymm tmp = alloc_tmp ();
3508- a->vmovups (tmp, &load64_index);
3509- a->vpermps (dst (), tmp, A::Mem{arg[immy], 0 });
3510- a->vpermps ( tmp, tmp, A::Mem{arg[immy], 32 });
3511- // Select low 128-bits holding 0,2,4,6 from each.
3512- a->vperm2f128 (dst (), dst (),tmp, 0x20 );
3513- free_tmp (tmp);
3514- } break ;
3515-
3516- case Op::load64_hi: if (scalar) {
3517- a->vmovd ((A::Xmm)dst (), A::Mem{arg[immy], 4 });
3518- } else {
3519- A::Ymm tmp = alloc_tmp ();
3520- a->vmovups (tmp, &load64_index);
3521- a->vpermps (dst (), tmp, A::Mem{arg[immy], 0 });
3522- a->vpermps ( tmp, tmp, A::Mem{arg[immy], 32 });
3523- // Select high 128-bits holding 1,3,5,7 from each.
3524- a->vperm2f128 (dst (), dst (),tmp, 0x31 );
3525- free_tmp (tmp);
3526- } break ;
3527-
3497+ case Op::load64: if (scalar) {
3498+ a->vmovd ((A::Xmm)dst (), A::Mem{arg[immy], 4 *immz});
3499+ } else {
3500+ A::Ymm tmp = alloc_tmp ();
3501+ a->vmovups (tmp, &load64_index);
3502+ a->vpermps (dst (), tmp, A::Mem{arg[immy], 0 });
3503+ a->vpermps ( tmp, tmp, A::Mem{arg[immy], 32 });
3504+ // Low 128 bits holds immz=0 lanes, high 128 bits holds immz=1.
3505+ a->vperm2f128 (dst (), dst (),tmp, immz ? 0x31 : 0x20 );
3506+ free_tmp (tmp);
3507+ } break ;
35283508
35293509 case Op::gather8: {
35303510 // As usual, the gather base pointer is immz bytes off of uniform immy.
0 commit comments