Skip to content

Commit 677e1d0

Browse files
authored
[CIR] Upstream gather instrinsics (#169157)
1 parent be5db33 commit 677e1d0

File tree

3 files changed

+483
-1
lines changed

3 files changed

+483
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
487487
case X86::BI__builtin_ia32_compressqi128_mask:
488488
case X86::BI__builtin_ia32_compressqi256_mask:
489489
case X86::BI__builtin_ia32_compressqi512_mask:
490+
cgm.errorNYI(expr->getSourceRange(),
491+
std::string("unimplemented X86 builtin call: ") +
492+
getContext().BuiltinInfo.getName(builtinID));
493+
return {};
490494
case X86::BI__builtin_ia32_gather3div2df:
491495
case X86::BI__builtin_ia32_gather3div2di:
492496
case X86::BI__builtin_ia32_gather3div4df:
@@ -510,7 +514,93 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
510514
case X86::BI__builtin_ia32_gathersiv8di:
511515
case X86::BI__builtin_ia32_gathersiv16si:
512516
case X86::BI__builtin_ia32_gatherdiv8di:
513-
case X86::BI__builtin_ia32_gatherdiv16si:
517+
case X86::BI__builtin_ia32_gatherdiv16si: {
518+
StringRef intrinsicName;
519+
switch (builtinID) {
520+
default:
521+
llvm_unreachable("Unexpected builtin");
522+
case X86::BI__builtin_ia32_gather3div2df:
523+
intrinsicName = "x86.avx512.mask.gather3div2.df";
524+
break;
525+
case X86::BI__builtin_ia32_gather3div2di:
526+
intrinsicName = "x86.avx512.mask.gather3div2.di";
527+
break;
528+
case X86::BI__builtin_ia32_gather3div4df:
529+
intrinsicName = "x86.avx512.mask.gather3div4.df";
530+
break;
531+
case X86::BI__builtin_ia32_gather3div4di:
532+
intrinsicName = "x86.avx512.mask.gather3div4.di";
533+
break;
534+
case X86::BI__builtin_ia32_gather3div4sf:
535+
intrinsicName = "x86.avx512.mask.gather3div4.sf";
536+
break;
537+
case X86::BI__builtin_ia32_gather3div4si:
538+
intrinsicName = "x86.avx512.mask.gather3div4.si";
539+
break;
540+
case X86::BI__builtin_ia32_gather3div8sf:
541+
intrinsicName = "x86.avx512.mask.gather3div8.sf";
542+
break;
543+
case X86::BI__builtin_ia32_gather3div8si:
544+
intrinsicName = "x86.avx512.mask.gather3div8.si";
545+
break;
546+
case X86::BI__builtin_ia32_gather3siv2df:
547+
intrinsicName = "x86.avx512.mask.gather3siv2.df";
548+
break;
549+
case X86::BI__builtin_ia32_gather3siv2di:
550+
intrinsicName = "x86.avx512.mask.gather3siv2.di";
551+
break;
552+
case X86::BI__builtin_ia32_gather3siv4df:
553+
intrinsicName = "x86.avx512.mask.gather3siv4.df";
554+
break;
555+
case X86::BI__builtin_ia32_gather3siv4di:
556+
intrinsicName = "x86.avx512.mask.gather3siv4.di";
557+
break;
558+
case X86::BI__builtin_ia32_gather3siv4sf:
559+
intrinsicName = "x86.avx512.mask.gather3siv4.sf";
560+
break;
561+
case X86::BI__builtin_ia32_gather3siv4si:
562+
intrinsicName = "x86.avx512.mask.gather3siv4.si";
563+
break;
564+
case X86::BI__builtin_ia32_gather3siv8sf:
565+
intrinsicName = "x86.avx512.mask.gather3siv8.sf";
566+
break;
567+
case X86::BI__builtin_ia32_gather3siv8si:
568+
intrinsicName = "x86.avx512.mask.gather3siv8.si";
569+
break;
570+
case X86::BI__builtin_ia32_gathersiv8df:
571+
intrinsicName = "x86.avx512.mask.gather.dpd.512";
572+
break;
573+
case X86::BI__builtin_ia32_gathersiv16sf:
574+
intrinsicName = "x86.avx512.mask.gather.dps.512";
575+
break;
576+
case X86::BI__builtin_ia32_gatherdiv8df:
577+
intrinsicName = "x86.avx512.mask.gather.qpd.512";
578+
break;
579+
case X86::BI__builtin_ia32_gatherdiv16sf:
580+
intrinsicName = "x86.avx512.mask.gather.qps.512";
581+
break;
582+
case X86::BI__builtin_ia32_gathersiv8di:
583+
intrinsicName = "x86.avx512.mask.gather.dpq.512";
584+
break;
585+
case X86::BI__builtin_ia32_gathersiv16si:
586+
intrinsicName = "x86.avx512.mask.gather.dpi.512";
587+
break;
588+
case X86::BI__builtin_ia32_gatherdiv8di:
589+
intrinsicName = "x86.avx512.mask.gather.qpq.512";
590+
break;
591+
case X86::BI__builtin_ia32_gatherdiv16si:
592+
intrinsicName = "x86.avx512.mask.gather.qpi.512";
593+
break;
594+
}
595+
596+
mlir::Location loc = getLoc(expr->getExprLoc());
597+
unsigned minElts =
598+
std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
599+
cast<cir::VectorType>(ops[2].getType()).getSize());
600+
ops[3] = getMaskVecValue(builder, loc, ops[3], minElts);
601+
return emitIntrinsicCallOp(builder, loc, intrinsicName.str(),
602+
convertType(expr->getType()), ops);
603+
}
514604
case X86::BI__builtin_ia32_scattersiv8df:
515605
case X86::BI__builtin_ia32_scattersiv16sf:
516606
case X86::BI__builtin_ia32_scatterdiv8df:

clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,194 @@ __mmask16 test_kmov_w(__mmask16 A) {
228228
// OGCG: bitcast <16 x i1> {{.*}} to i16
229229
return __builtin_ia32_kmovw(A);
230230
}
231+
__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
232+
// CIR-LABEL: test_mm512_i64gather_ps
233+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qps.512"
234+
235+
// LLVM-LABEL: test_mm512_i64gather_ps
236+
// LLVM: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
237+
238+
// OGCG-LABEL: test_mm512_i64gather_ps
239+
// OGCG: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
240+
return _mm512_i64gather_ps(__index, __addr, 2);
241+
}
242+
243+
__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
244+
// CIR-LABEL: test_mm512_mask_i64gather_ps
245+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qps.512"
246+
247+
// LLVM-LABEL: test_mm512_mask_i64gather_ps
248+
// LLVM: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
249+
250+
// OGCG-LABEL: test_mm512_mask_i64gather_ps
251+
// OGCG: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
252+
return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
253+
}
254+
255+
__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
256+
// CIR-LABEL: test_mm512_i64gather_epi32
257+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpi.512"
258+
259+
// LLVM-LABEL: test_mm512_i64gather_epi32
260+
// LLVM: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
261+
262+
// OGCG-LABEL: test_mm512_i64gather_epi32
263+
// OGCG: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
264+
return _mm512_i64gather_epi32(__index, __addr, 2);
265+
}
266+
267+
__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
268+
// CIR-LABEL: test_mm512_mask_i64gather_epi32
269+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpi.512"
270+
271+
// LLVM-LABEL: test_mm512_mask_i64gather_epi32
272+
// LLVM: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
273+
274+
// OGCG-LABEL: test_mm512_mask_i64gather_epi32
275+
// OGCG: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
276+
return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
277+
}
278+
279+
__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
280+
// CIR-LABEL: test_mm512_i64gather_pd
281+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpd.512
282+
283+
// LLVM-LABEL: test_mm512_i64gather_pd
284+
// LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
285+
286+
// OGCG-LABEL: test_mm512_i64gather_pd
287+
// OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
288+
return _mm512_i64gather_pd(__index, __addr, 2);
289+
}
290+
291+
__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
292+
// CIR-LABEL: test_mm512_mask_i64gather_pd
293+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpd.512
294+
295+
// LLVM-LABEL: test_mm512_mask_i64gather_pd
296+
// LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
297+
298+
// OGCG-LABEL: test_mm512_mask_i64gather_pd
299+
// OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
300+
return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
301+
}
302+
303+
__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
304+
// CIR-LABEL: test_mm512_i64gather_epi64
305+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpq.512
306+
307+
// LLVM-LABEL: test_mm512_i64gather_epi64
308+
// LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
309+
310+
// OGCG-LABEL: test_mm512_i64gather_epi64
311+
// OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
312+
return _mm512_i64gather_epi64(__index, __addr, 2);
313+
}
314+
315+
__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
316+
// CIR-LABEL: test_mm512_mask_i64gather_epi64
317+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpq.512
318+
319+
// LLVM-LABEL: test_mm512_mask_i64gather_epi64
320+
// LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
321+
322+
// OGCG-LABEL: test_mm512_mask_i64gather_epi64
323+
// OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
324+
return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
325+
}
326+
327+
__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
328+
// CIR-LABEL: test_mm512_i32gather_ps
329+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dps.512
330+
331+
// LLVM-LABEL: test_mm512_i32gather_ps
332+
// LLVM: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
333+
334+
// OGCG-LABEL: test_mm512_i32gather_ps
335+
// OGCG: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
336+
return _mm512_i32gather_ps(__index, __addr, 2);
337+
}
338+
339+
__m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
340+
// CIR-LABEL: test_mm512_mask_i32gather_ps
341+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dps.512
342+
343+
// LLVM-LABEL: test_mm512_mask_i32gather_ps
344+
// LLVM: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
345+
346+
// OGCG-LABEL: test_mm512_mask_i32gather_ps
347+
// OGCG: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
348+
return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2);
349+
}
350+
351+
__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
352+
// CIR-LABEL: test_mm512_i32gather_epi32
353+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpi.512
354+
355+
// LLVM-LABEL: test_mm512_i32gather_epi32
356+
// LLVM: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
357+
358+
// OGCG-LABEL: test_mm512_i32gather_epi32
359+
// OGCG: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
360+
return _mm512_i32gather_epi32(__index, __addr, 2);
361+
}
362+
363+
__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
364+
// CIR-LABEL: test_mm512_mask_i32gather_epi32
365+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpi.512
366+
367+
// LLVM-LABEL: test_mm512_mask_i32gather_epi32
368+
// LLVM: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
369+
370+
// OGCG-LABEL: test_mm512_mask_i32gather_epi32
371+
// OGCG: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
372+
return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
373+
}
374+
375+
__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
376+
// CIR-LABEL: test_mm512_i32gather_pd
377+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpd.512
378+
379+
// LLVM-LABEL: test_mm512_i32gather_pd
380+
// LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
381+
382+
// OGCG-LABEL: test_mm512_i32gather_pd
383+
// OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
384+
return _mm512_i32gather_pd(__index, __addr, 2);
385+
}
386+
387+
__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
388+
// CIR-LABEL: test_mm512_mask_i32gather_pd
389+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpd.512
390+
391+
// LLVM-LABEL: test_mm512_mask_i32gather_pd
392+
// LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
393+
394+
// OGCG-LABEL: test_mm512_mask_i32gather_pd
395+
// OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
396+
return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
397+
}
398+
399+
__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
400+
// CIR-LABEL: test_mm512_i32gather_epi64
401+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpq.512
402+
403+
// LLVM-LABEL: test_mm512_i32gather_epi64
404+
// LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
405+
406+
// OGCG-LABEL: test_mm512_i32gather_epi64
407+
// OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
408+
return _mm512_i32gather_epi64(__index, __addr, 2);
409+
}
410+
411+
__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
412+
// CIR-LABEL: test_mm512_mask_i32gather_epi64
413+
// CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpq.512
414+
415+
// LLVM-LABEL: test_mm512_mask_i32gather_epi64
416+
// LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
417+
418+
// OGCG-LABEL: test_mm512_mask_i32gather_epi64
419+
// OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
420+
return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
421+
}

0 commit comments

Comments
 (0)