@@ -323,7 +323,7 @@ public ByteVector unary(ByteVector xVec, int vectorOpcode) {
323
323
case Bytecode .VECTOR_V128_NOT -> unop (x , I8X16 , VectorOperators .NOT );
324
324
case Bytecode .VECTOR_I8X16_ABS -> unop (x , I8X16 , VectorOperators .ABS );
325
325
case Bytecode .VECTOR_I8X16_NEG -> unop (x , I8X16 , VectorOperators .NEG );
326
- case Bytecode .VECTOR_I8X16_POPCNT -> unop ( x , I8X16 , VectorOperators . BIT_COUNT );
326
+ case Bytecode .VECTOR_I8X16_POPCNT -> i8x16_popcnt ( x ); // GR-68892
327
327
case Bytecode .VECTOR_I16X8_EXTADD_PAIRWISE_I8X16_S -> extadd_pairwise (x , I8X16 , VectorOperators .B2S );
328
328
case Bytecode .VECTOR_I16X8_EXTADD_PAIRWISE_I8X16_U -> extadd_pairwise (x , I8X16 , VectorOperators .ZERO_EXTEND_B2S );
329
329
case Bytecode .VECTOR_I16X8_EXTEND_LOW_I8X16_S -> extend (x , 0 , I8X16 , VectorOperators .B2S );
@@ -366,16 +366,16 @@ public ByteVector unary(ByteVector xVec, int vectorOpcode) {
366
366
case Bytecode .VECTOR_F64X2_TRUNC -> trunc (x , F64X2 , I64X2 , VectorOperators .REINTERPRET_D2L , VectorOperators .REINTERPRET_L2D ,
367
367
Vector128OpsVectorAPI ::getExponentDoubles , DOUBLE_SIGNIFICAND_WIDTH , I64X2 .broadcast (DOUBLE_SIGNIF_BIT_MASK ));
368
368
case Bytecode .VECTOR_F64X2_NEAREST -> nearest (x , F64X2 , 1L << (DOUBLE_SIGNIFICAND_WIDTH - 1 ));
369
- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_S , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_S -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
370
- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_U , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_U -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
369
+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_S , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_S -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
370
+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F32X4_U , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F32X4_U -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
371
371
case Bytecode .VECTOR_F32X4_CONVERT_I32X4_S -> convert (x , I32X4 , VectorOperators .I2F );
372
- case Bytecode .VECTOR_F32X4_CONVERT_I32X4_U -> f32x4_convert_i32x4_u ( x );
373
- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_S_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_S_ZERO -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
374
- case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_U_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_U_ZERO -> I8X16 . species (). fromArray (fallbackOps .unary (x .toArray (), vectorOpcode ), 0 ); // GR-51421
372
+ case Bytecode .VECTOR_F32X4_CONVERT_I32X4_U -> fromArray ( fallbackOps . unary ( x . toArray (), vectorOpcode )); // GR-68843
373
+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_S_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_S_ZERO -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
374
+ case Bytecode .VECTOR_I32X4_TRUNC_SAT_F64X2_U_ZERO , Bytecode .VECTOR_I32X4_RELAXED_TRUNC_F64X2_U_ZERO -> fromArray (fallbackOps .unary (x .toArray (), vectorOpcode )); // GR-51421
375
375
case Bytecode .VECTOR_F64X2_CONVERT_LOW_I32X4_S -> convert (x , I32X4 , VectorOperators .I2D );
376
376
case Bytecode .VECTOR_F64X2_CONVERT_LOW_I32X4_U -> f64x2_convert_low_i32x4_u (x );
377
- case Bytecode .VECTOR_F32X4_DEMOTE_F64X2_ZERO -> f32X4_demote_f64X2_zero ( x );
378
- case Bytecode .VECTOR_F64X2_PROMOTE_LOW_F32X4 -> convert ( x , F32X4 , VectorOperators . F2D );
377
+ case Bytecode .VECTOR_F32X4_DEMOTE_F64X2_ZERO -> fromArray ( fallbackOps . unary ( x . toArray (), vectorOpcode )); // GR-68843
378
+ case Bytecode .VECTOR_F64X2_PROMOTE_LOW_F32X4 -> fromArray ( fallbackOps . unary ( x . toArray (), vectorOpcode )); // GR-68843
379
379
default -> throw CompilerDirectives .shouldNotReachHere ();
380
380
});
381
381
}
@@ -441,30 +441,30 @@ public ByteVector binary(ByteVector xVec, ByteVector yVec, int vectorOpcode) {
441
441
case Bytecode .VECTOR_I8X16_NARROW_I16X8_S -> narrow (x , y , I16X8 , I8X16 , Byte .MIN_VALUE , Byte .MAX_VALUE );
442
442
case Bytecode .VECTOR_I8X16_NARROW_I16X8_U -> narrow (x , y , I16X8 , I8X16 , (short ) 0 , (short ) 0xff );
443
443
case Bytecode .VECTOR_I8X16_ADD -> binop (x , y , I8X16 , VectorOperators .ADD );
444
- case Bytecode .VECTOR_I8X16_ADD_SAT_S -> binop (x , y , I8X16 , VectorOperators .SADD );
445
- case Bytecode .VECTOR_I8X16_ADD_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .ADD , 0 , 0xff );
444
+ case Bytecode .VECTOR_I8X16_ADD_SAT_S -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .B2S , VectorOperators . ADD , Byte . MIN_VALUE , Byte . MAX_VALUE ); // GR-68891
445
+ case Bytecode .VECTOR_I8X16_ADD_SAT_U -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .ADD , 0 , 0xff ); // GR-68891
446
446
case Bytecode .VECTOR_I8X16_SUB -> binop (x , y , I8X16 , VectorOperators .SUB );
447
- case Bytecode .VECTOR_I8X16_SUB_SAT_S -> binop (x , y , I8X16 , VectorOperators .SSUB );
448
- case Bytecode .VECTOR_I8X16_SUB_SAT_U -> binop_sat_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .SUB , 0 , 0xff );
447
+ case Bytecode .VECTOR_I8X16_SUB_SAT_S -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .B2S , VectorOperators . SUB , Byte . MIN_VALUE , Byte . MAX_VALUE ); // GR-68891
448
+ case Bytecode .VECTOR_I8X16_SUB_SAT_U -> binop_sat (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S , VectorOperators .SUB , 0 , 0xff ); // GR-68891
449
449
case Bytecode .VECTOR_I8X16_MIN_S -> binop (x , y , I8X16 , VectorOperators .MIN );
450
- case Bytecode .VECTOR_I8X16_MIN_U -> binop ( x , y , I8X16 , VectorOperators . UMIN );
450
+ case Bytecode .VECTOR_I8X16_MIN_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
451
451
case Bytecode .VECTOR_I8X16_MAX_S -> binop (x , y , I8X16 , VectorOperators .MAX );
452
- case Bytecode .VECTOR_I8X16_MAX_U -> binop ( x , y , I8X16 , VectorOperators . UMAX );
452
+ case Bytecode .VECTOR_I8X16_MAX_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
453
453
case Bytecode .VECTOR_I8X16_AVGR_U -> avgr_u (x , y , I8X16 , I16X8 , VectorOperators .ZERO_EXTEND_B2S );
454
454
case Bytecode .VECTOR_I16X8_NARROW_I32X4_S -> narrow (x , y , I32X4 , I16X8 , Short .MIN_VALUE , Short .MAX_VALUE );
455
455
case Bytecode .VECTOR_I16X8_NARROW_I32X4_U -> narrow (x , y , I32X4 , I16X8 , 0 , 0xffff );
456
456
case Bytecode .VECTOR_I16X8_Q15MULR_SAT_S , Bytecode .VECTOR_I16X8_RELAXED_Q15MULR_S -> i16x8_q15mulr_sat_s (x , y );
457
457
case Bytecode .VECTOR_I16X8_ADD -> binop (x , y , I16X8 , VectorOperators .ADD );
458
- case Bytecode .VECTOR_I16X8_ADD_SAT_S -> binop (x , y , I16X8 , VectorOperators .SADD );
459
- case Bytecode .VECTOR_I16X8_ADD_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .ADD , 0 , 0xffff );
458
+ case Bytecode .VECTOR_I16X8_ADD_SAT_S -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .S2I , VectorOperators . ADD , Short . MIN_VALUE , Short . MAX_VALUE ); // GR-68891
459
+ case Bytecode .VECTOR_I16X8_ADD_SAT_U -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .ADD , 0 , 0xffff ); // GR-68891
460
460
case Bytecode .VECTOR_I16X8_SUB -> binop (x , y , I16X8 , VectorOperators .SUB );
461
- case Bytecode .VECTOR_I16X8_SUB_SAT_S -> binop (x , y , I16X8 , VectorOperators .SSUB );
462
- case Bytecode .VECTOR_I16X8_SUB_SAT_U -> binop_sat_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .SUB , 0 , 0xffff );
461
+ case Bytecode .VECTOR_I16X8_SUB_SAT_S -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .S2I , VectorOperators . SUB , Short . MIN_VALUE , Short . MAX_VALUE ); // GR-68891
462
+ case Bytecode .VECTOR_I16X8_SUB_SAT_U -> binop_sat (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I , VectorOperators .SUB , 0 , 0xffff ); // GR-68891
463
463
case Bytecode .VECTOR_I16X8_MUL -> binop (x , y , I16X8 , VectorOperators .MUL );
464
464
case Bytecode .VECTOR_I16X8_MIN_S -> binop (x , y , I16X8 , VectorOperators .MIN );
465
- case Bytecode .VECTOR_I16X8_MIN_U -> binop ( x , y , I16X8 , VectorOperators . UMIN );
465
+ case Bytecode .VECTOR_I16X8_MIN_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
466
466
case Bytecode .VECTOR_I16X8_MAX_S -> binop (x , y , I16X8 , VectorOperators .MAX );
467
- case Bytecode .VECTOR_I16X8_MAX_U -> binop ( x , y , I16X8 , VectorOperators . UMAX );
467
+ case Bytecode .VECTOR_I16X8_MAX_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
468
468
case Bytecode .VECTOR_I16X8_AVGR_U -> avgr_u (x , y , I16X8 , I32X4 , VectorOperators .ZERO_EXTEND_S2I );
469
469
case Bytecode .VECTOR_I16X8_EXTMUL_LOW_I8X16_S -> extmul (x , y , I8X16 , VectorOperators .B2S , 0 );
470
470
case Bytecode .VECTOR_I16X8_EXTMUL_LOW_I8X16_U -> extmul (x , y , I8X16 , VectorOperators .ZERO_EXTEND_B2S , 0 );
@@ -474,9 +474,9 @@ public ByteVector binary(ByteVector xVec, ByteVector yVec, int vectorOpcode) {
474
474
case Bytecode .VECTOR_I32X4_SUB -> binop (x , y , I32X4 , VectorOperators .SUB );
475
475
case Bytecode .VECTOR_I32X4_MUL -> binop (x , y , I32X4 , VectorOperators .MUL );
476
476
case Bytecode .VECTOR_I32X4_MIN_S -> binop (x , y , I32X4 , VectorOperators .MIN );
477
- case Bytecode .VECTOR_I32X4_MIN_U -> binop ( x , y , I32X4 , VectorOperators . UMIN );
477
+ case Bytecode .VECTOR_I32X4_MIN_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
478
478
case Bytecode .VECTOR_I32X4_MAX_S -> binop (x , y , I32X4 , VectorOperators .MAX );
479
- case Bytecode .VECTOR_I32X4_MAX_U -> binop ( x , y , I32X4 , VectorOperators . UMAX );
479
+ case Bytecode .VECTOR_I32X4_MAX_U -> fromArray ( fallbackOps . binary ( x . toArray () , y . toArray (), vectorOpcode )); // GR-68891
480
480
case Bytecode .VECTOR_I32X4_DOT_I16X8_S -> i32x4_dot_i16x8_s (x , y );
481
481
case Bytecode .VECTOR_I32X4_EXTMUL_LOW_I16X8_S -> extmul (x , y , I16X8 , VectorOperators .S2I , 0 );
482
482
case Bytecode .VECTOR_I32X4_EXTMUL_LOW_I16X8_U -> extmul (x , y , I16X8 , VectorOperators .ZERO_EXTEND_S2I , 0 );
@@ -537,7 +537,7 @@ public int vectorToInt(ByteVector xVec, int vectorOpcode) {
537
537
case Bytecode .VECTOR_I16X8_BITMASK -> bitmask (x , I16X8 );
538
538
case Bytecode .VECTOR_I32X4_ALL_TRUE -> all_true (x , I32X4 );
539
539
case Bytecode .VECTOR_I32X4_BITMASK -> bitmask (x , I32X4 );
540
- case Bytecode .VECTOR_I64X2_ALL_TRUE -> all_true ( x , I64X2 );
540
+ case Bytecode .VECTOR_I64X2_ALL_TRUE -> fallbackOps . vectorToInt ( x . toArray (), vectorOpcode ); // GR-68893
541
541
case Bytecode .VECTOR_I64X2_BITMASK -> bitmask (x , I64X2 );
542
542
default -> throw CompilerDirectives .shouldNotReachHere ();
543
543
};
@@ -747,6 +747,13 @@ private static <E> ByteVector unop(ByteVector xBytes, Shape<E> shape, VectorOper
747
747
return result .reinterpretAsBytes ();
748
748
}
749
749
750
+ private static ByteVector i8x16_popcnt (ByteVector x ) {
751
+ // Based on the same approach as Integer#bitCount
752
+ ByteVector popcnt = x .sub (x .lanewise (VectorOperators .LSHR , 1 ).and ((byte ) 0x55 ));
753
+ popcnt = popcnt .and ((byte ) 0x33 ).add (popcnt .lanewise (VectorOperators .LSHR , 2 ).and ((byte ) 0x33 ));
754
+ return popcnt .add (popcnt .lanewise (VectorOperators .LSHR , 4 )).and ((byte ) 0x0F );
755
+ }
756
+
750
757
private static <E , F > ByteVector extadd_pairwise (ByteVector xBytes , Shape <E > shape , VectorOperators .Conversion <E , F > conv ) {
751
758
Vector <E > x = shape .reinterpret (xBytes );
752
759
Vector <F > evens = x .compress (shape .evensMask ).convert (conv , 0 );
@@ -889,6 +896,7 @@ private static ByteVector i32x4_trunc_sat_f32x4_u(ByteVector xBytes) {
889
896
return result .reinterpretAsBytes ();
890
897
}
891
898
899
+ @ SuppressWarnings ("unused" )
892
900
private static ByteVector f32x4_convert_i32x4_u (ByteVector xBytes ) {
893
901
IntVector x = xBytes .reinterpretAsInts ();
894
902
LongVector xUnsignedLow = castLong128 (x .convert (VectorOperators .ZERO_EXTEND_I2L , 0 ));
@@ -915,6 +923,7 @@ private static ByteVector f64x2_convert_low_i32x4_u(ByteVector xBytes) {
915
923
return result .reinterpretAsBytes ();
916
924
}
917
925
926
+ @ SuppressWarnings ("unused" )
918
927
private static ByteVector f32X4_demote_f64X2_zero (ByteVector xBytes ) {
919
928
DoubleVector x = F64X2 .reinterpret (xBytes );
920
929
Vector <Float > result = compactGeneral (x , 0 , I64X2 , F32X4 , VectorOperators .D2F , VectorOperators .REINTERPRET_F2I , VectorOperators .ZERO_EXTEND_I2L );
@@ -1018,7 +1027,7 @@ private static <E, F> ByteVector narrow(ByteVector xBytes, ByteVector yBytes, Sh
1018
1027
return result .reinterpretAsBytes ();
1019
1028
}
1020
1029
1021
- private static <E , F > ByteVector binop_sat_u (ByteVector xBytes , ByteVector yBytes ,
1030
+ private static <E , F > ByteVector binop_sat (ByteVector xBytes , ByteVector yBytes ,
1022
1031
Shape <E > shape , Shape <F > extendedShape ,
1023
1032
VectorOperators .Conversion <E , F > upcast ,
1024
1033
VectorOperators .Binary op , long min , long max ) {
@@ -1033,8 +1042,7 @@ private static <E, F> ByteVector avgr_u(ByteVector xBytes, ByteVector yBytes,
1033
1042
Shape <E > shape , Shape <F > extendedShape ,
1034
1043
VectorOperators .Conversion <E , F > upcast ) {
1035
1044
Vector <F > one = extendedShape .broadcast (1 );
1036
- Vector <F > two = extendedShape .broadcast (2 );
1037
- return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , (x , y ) -> x .add (y ).add (one ).div (two ));
1045
+ return upcastBinopDowncast (xBytes , yBytes , shape , extendedShape , upcast , (x , y ) -> x .add (y ).add (one ).lanewise (VectorOperators .LSHR , 1 ));
1038
1046
}
1039
1047
1040
1048
private static ByteVector i16x8_q15mulr_sat_s (ByteVector xBytes , ByteVector yBytes ) {
0 commit comments