jasmin-lang · bgregoir · Dec 12, 2022 · Nov 30, 2022 · Dec 2, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,17 @@
   ([PR #290](https://github.com/jasmin-lang/jasmin/pull/290)).
   These get extracted to `|<<|` and `|>>|` in EasyCrypt.
 
+  - x86 intrinsics that accept a size suffix (e.g., `_128`) also accept, with a
+    warning, a vector suffix (e.g., `_4u32`)
+    ([PR #303](https://github.com/jasmin-lang/jasmin/pull/303)).
+
+## Bug fixes
+
+ - The x86 instructions `VMOVSHDUP` and `VMOVSLDUP` accept a size suffix (`_128`
+   or `_256`) instead of a vector description suffix (`4u32` or `8u32`)
+  ([PR #303](https://github.com/jasmin-lang/jasmin/pull/303);
+  fixes [#301](https://github.com/jasmin-lang/jasmin/issues/301)).
+
 ## Other changes
 
 - Explicit if-then-else in flag combinations is no longer supported

diff --git a/compiler/src/pretyping.ml b/compiler/src/pretyping.ml
@@ -1385,7 +1385,11 @@ let tt_prim asmOp ws id args =
     pr ws (match sz with
         | SAw sz -> sz 
         | SA -> d 
-        | SAv _ | SAvv _ -> rs_tyerror ~loc (PrimNotVector s)
+        | SAv (s, ve, sz) ->
+           warning SimplifyVectorSuffix (L.i_loc0 loc) "vector suffix simplified from %s to %a"
+             (PrintCommon.string_of_velem s sz ve) PrintCommon.pp_wsize sz;
+           sz
+        | SAvv _ -> rs_tyerror ~loc (PrimNotVector s)
         | SAx _ -> rs_tyerror ~loc (PrimNotX s))
   | PrimM pr -> if sz = SA then pr ws else rs_tyerror ~loc (PrimNoSize s)
   | PrimV pr -> (match sz with SAv (s, ve, sz) -> pr ws s ve sz | _ -> rs_tyerror ~loc (PrimIsVector s))

diff --git a/compiler/src/printCommon.mli b/compiler/src/printCommon.mli
@@ -1,6 +1,7 @@
 val pp_string0 : Format.formatter -> char list -> unit
 val pp_wsize : Format.formatter -> Wsize.wsize -> unit
 val string_of_signess : Wsize.signedness -> string
+val string_of_velem : Wsize.signedness -> Wsize.wsize -> Wsize.velem -> string
 val string_of_op1 : Expr.sop1 -> string
 val string_of_op2 : Expr.sop2 -> string
 val pp_opn : 'asm Sopn.asmOp -> Format.formatter -> 'asm Sopn.sopn -> unit

diff --git a/compiler/src/utils.ml b/compiler/src/utils.ml
@@ -401,6 +401,7 @@ type warning =
   | UseLea
   | IntroduceNone 
   | IntroduceArrayCopy
+  | SimplifyVectorSuffix
   | Always
 
 let warns = ref None

diff --git a/compiler/src/utils.mli b/compiler/src/utils.mli
@@ -179,7 +179,8 @@ type warning =
   | ExtraAssignment 
   | UseLea
   | IntroduceNone
-  | IntroduceArrayCopy 
+  | IntroduceArrayCopy
+  | SimplifyVectorSuffix
   | Always
 
 val nowarning : unit -> unit

diff --git a/compiler/tests/success/vmovsxdup.jazz b/compiler/tests/success/vmovsxdup.jazz
@@ -3,20 +3,20 @@ fn test(reg u64 p) {
   reg u128 a b;
   reg u256 x y;
 
-  a = #VMOVSLDUP_4u32((u128)[p]);
-  b = #VMOVSLDUP_4u32(a);
+  a = #VMOVSLDUP_128((u128)[p]);
+  b = #VMOVSLDUP_128(a);
   (u128)[p] = b;
 
-  a = #VMOVSHDUP_4u32((u128)[p]);
-  b = #VMOVSHDUP_4u32(a);
+  a = #VMOVSHDUP_128((u128)[p]);
+  b = #VMOVSHDUP_128(a);
   (u128)[p] = b;
 
-  x = #VMOVSLDUP_8u32((u256)[p]);
-  y = #VMOVSLDUP_8u32(x);
+  x = #VMOVSLDUP_256((u256)[p]);
+  y = #VMOVSLDUP_256(x);
   (u128)[p] = y;
 
-  x = #VMOVSHDUP_8u32((u256)[p]);
-  y = #VMOVSHDUP_8u32(x);
+  x = #VMOVSHDUP_256((u256)[p]);
+  y = #VMOVSHDUP_256(x);
   (u128)[p] = y;
 
 }
diff --git a/eclib/JModel.ec b/eclib/JModel.ec
@@ -670,21 +670,21 @@ op VSHUFPS_256 (w1 : W256.t) (w2 : W256.t) (m : W8.t) : W256.t =
 
 (* ------------------------------------------------------------------- *)
 (*
-| VMOVSHDUP of velem & wsize (* Replicate 32-bit (“single”) high values *)
-| VMOVSLDUP of velem & wsize (* Replicate 32-bit (“single”) low values *)
+| VMOVSHDUP of wsize (* Replicate 32-bit (“single”) high values *)
+| VMOVSLDUP of wsize (* Replicate 32-bit (“single”) low values *)
 *)
 
-op VMOVSLDUP_4u32 (v: W128.t): W128.t =
+op VMOVSLDUP_128 (v: W128.t): W128.t =
   pack4 [v \bits32 0; v \bits32 0; v \bits32  2; v \bits32 2].
 
-op VMOVSLDUP_8u32 (v: W256.t): W256.t =
-  map VMOVSLDUP_4u32 v.
+op VMOVSLDUP_256 (v: W256.t): W256.t =
+  map VMOVSLDUP_128 v.
 
-op VMOVSHDUP_4u32 (v: W128.t): W128.t =
+op VMOVSHDUP_128 (v: W128.t): W128.t =
   pack4 [v \bits32 1; v \bits32 1; v \bits32  3; v \bits32 3].
 
-op VMOVSHDUP_8u32 (v: W256.t): W256.t =
-  map VMOVSHDUP_4u32 v.
+op VMOVSHDUP_256 (v: W256.t): W256.t =
+  map VMOVSHDUP_128 v.
 
 (* ------------------------------------------------------------------- *)
 (*

diff --git a/proofs/compiler/x86_instr_decl.v b/proofs/compiler/x86_instr_decl.v
@@ -121,8 +121,8 @@ Variant x86_op : Type :=
 | VPACKSS  `(velem) `(wsize)
 | VSHUFPS  `(wsize)
 | VPBROADCAST of velem & wsize
-| VMOVSHDUP of velem & wsize (* Replicate 32-bit (“single”) high values *)
-| VMOVSLDUP of velem & wsize (* Replicate 32-bit (“single”) low values *)
+| VMOVSHDUP of wsize (* Replicate 32-bit (“single”) high values *)
+| VMOVSLDUP of wsize (* Replicate 32-bit (“single”) low values *)
 | VPALIGNR  `(wsize)
 | VBROADCASTI128
 | VPUNPCKH `(velem) `(wsize)
@@ -829,15 +829,13 @@ Definition x86_VPBROADCAST ve sz (v: word ve) : ex_tpl (w_ty sz) :=
   ok (wpbroadcast sz v).
 
 (* ---------------------------------------------------------------- *)
-Definition x86_VMOVSHDUP ve sz (v: word sz) : ex_tpl (w_ty sz) :=
+Definition x86_VMOVSHDUP sz (v: word sz) : ex_tpl (w_ty sz) :=
   Let _ := check_size_128_256 sz in
-  Let _ := assert (ve == VE32) ErrType in
-  ok (wdup_hi ve v).
+  ok (wdup_hi VE32 v).
 
-Definition x86_VMOVSLDUP ve sz (v: word sz) : ex_tpl (w_ty sz) :=
+Definition x86_VMOVSLDUP sz (v: word sz) : ex_tpl (w_ty sz) :=
   Let _ := check_size_128_256 sz in
-  Let _ := assert (ve == VE32) ErrType in
-  ok (wdup_lo ve v).
+  ok (wdup_lo VE32 v).
 
 (* ---------------------------------------------------------------- *)
 Definition x86_VEXTRACTI128 (v: u256) (i: u8) : ex_tpl (w_ty U128) :=
@@ -1508,10 +1506,10 @@ Definition Ox86_VPBROADCAST_instr       :=
   mk_ve_instr_w_w_10 "VPBROADCAST" x86_VPBROADCAST check_xmm_xmmm (PrimV VPBROADCAST) pp_vpbroadcast.
 
 Definition Ox86_VMOVSHDUP_instr :=
-  mk_ve_instr_w_w_10 "VMOVSHDUP" x86_VMOVSHDUP check_xmm_xmmm (PrimV VMOVSHDUP) (λ _, pp_name "vmovshdup").
+  mk_instr_w_w "VMOVSHDUP" x86_VMOVSHDUP [:: E 1 ] [:: E 0 ] 2 check_xmm_xmmm (PrimP U256 VMOVSHDUP) (pp_name "vmovshdup").
 
 Definition Ox86_VMOVSLDUP_instr :=
-  mk_ve_instr_w_w_10 "VMOVSLDUP" x86_VMOVSLDUP check_xmm_xmmm (PrimV VMOVSLDUP) (λ _, pp_name "vmovsldup").
+  mk_instr_w_w "VMOVSLDUP" x86_VMOVSLDUP [:: E 1 ] [:: E 0 ] 2 check_xmm_xmmm (PrimP U256 VMOVSLDUP) (pp_name "vmovsldup").
 
 Definition Ox86_VPALIGNR_instr := 
   ((fun sz =>
@@ -1834,8 +1832,8 @@ Definition x86_instr_desc o : instr_desc_t :=
   | VPACKUS ve sz      => Ox86_VPACKUS_instr.1 ve sz
   | VPACKSS ve sz      => Ox86_VPACKSS_instr.1 ve sz
   | VPBROADCAST sz sz' => Ox86_VPBROADCAST_instr.1 sz sz'
-  | VMOVSHDUP sz sz'   => Ox86_VMOVSHDUP_instr.1 sz sz'
-  | VMOVSLDUP sz sz'   => Ox86_VMOVSLDUP_instr.1 sz sz'
+  | VMOVSHDUP sz       => Ox86_VMOVSHDUP_instr.1 sz
+  | VMOVSLDUP sz       => Ox86_VMOVSLDUP_instr.1 sz
   | VPALIGNR sz        => Ox86_VPALIGNR_instr.1 sz 
   | VBROADCASTI128     => Ox86_VBROADCASTI128_instr.1
   | VPERM2I128         => Ox86_VPERM2I128_instr.1