@@ -35,8 +35,8 @@ namespace torchao {
35
35
// On CUDA earlier than 12.5, the ordered_metadata version of this instruction
36
36
// is not supported. On later versions of CUDA the version without ordered
37
37
// metadata results in the following warning:
38
- // | Advisory: Modifier ‘. sp::ordered_metadata’ should be used on instruction
39
- // | ‘ mma’ instead of modifier ‘.sp’ as it is expected to have substantially
38
+ // | Advisory: Modifier ' sp::ordered_metadata' should be used on instruction
39
+ // | ' mma' instead of modifier 'sp' as it is expected to have substantially
40
40
// | reduced performance on some future architectures
41
41
42
42
#if defined(USE_ROCM)
@@ -281,15 +281,15 @@ __device__ inline void scale_floats(float* c0, float* c1, float* c2, float* c3,
281
281
float * c7, FragS& s1) {
282
282
#ifdef USE_ROCM
283
283
// AMD implementation - fixed
284
- *c0 = __builtin_amdgcn_fmul_f32 (*c0, __half2float (s0[0 ].x ));
285
- *c1 = __builtin_amdgcn_fmul_f32 (*c1, __half2float (s0[0 ].y ));
286
- *c2 = __builtin_amdgcn_fmul_f32 (*c2, __half2float (s0[1 ].x ));
287
- *c3 = __builtin_amdgcn_fmul_f32 (*c3, __half2float (s0[1 ].y ));
284
+ *c0 = __ocml_fmul_f32 (*c0, __half2float (s0[0 ].x ));
285
+ *c1 = __ocml_fmul_f32 (*c1, __half2float (s0[0 ].y ));
286
+ *c2 = __ocml_fmul_f32 (*c2, __half2float (s0[1 ].x ));
287
+ *c3 = __ocml_fmul_f32 (*c3, __half2float (s0[1 ].y ));
288
288
289
- *c4 = __builtin_amdgcn_fmul_f32 (*c4, __half2float (s1[0 ].x ));
290
- *c5 = __builtin_amdgcn_fmul_f32 (*c5, __half2float (s1[0 ].y ));
291
- *c6 = __builtin_amdgcn_fmul_f32 (*c6, __half2float (s1[1 ].x ));
292
- *c7 = __builtin_amdgcn_fmul_f32 (*c7, __half2float (s1[1 ].y ));
289
+ *c4 = __ocml_fmul_f32 (*c4, __half2float (s1[0 ].x ));
290
+ *c5 = __ocml_fmul_f32 (*c5, __half2float (s1[0 ].y ));
291
+ *c6 = __ocml_fmul_f32 (*c6, __half2float (s1[1 ].x ));
292
+ *c7 = __ocml_fmul_f32 (*c7, __half2float (s1[1 ].y ));
293
293
#else
294
294
// NVIDIA implementation
295
295
*c0 = __fmul_rn (*c0, __half2float (s0[0 ].x ));
0 commit comments