Skip to content
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
29e2794
add tests that should pass: clang/test/SemaCXX/constexpr-x86-intrinsi…
HamzaHassanain Nov 26, 2025
30c0dc7
added __DEFAULT_FN_ATTRS_CONSTEXPR To Headers
HamzaHassanain Nov 26, 2025
9f1020e
added Constexpr to necessary builtins
HamzaHassanain Nov 26, 2025
d28d6d8
added FULL tests for pd2ps constexpr
HamzaHassanain Nov 29, 2025
4a2f59b
fully implmeneted features in ExprConstant visiting logic
HamzaHassanain Nov 29, 2025
0fb3292
Ran the git clang-format command
HamzaHassanain Nov 29, 2025
a39b5d7
Merge branch 'main' into constexpr-cvtpd2ps
HamzaHassanain Nov 29, 2025
beb8139
Merge branch 'main' into constexpr-cvtpd2ps
HamzaHassanain Nov 29, 2025
2cdbb1f
Merge branch 'constexpr-cvtpd2ps' of https://github.com/HamzaHassanai…
HamzaHassanain Nov 29, 2025
f8afdda
Merge branch 'main' into constexpr-cvtpd2ps
HamzaHassanain Nov 29, 2025
75c7671
removed constexpr form _mm512_undefined_ps
HamzaHassanain Dec 1, 2025
04dabc0
added constexpr to __builtin_ia32_undef, and updated BuiltinsX86.td
HamzaHassanain Dec 1, 2025
880b060
Removed undef as constexpr and used _mm256_setzero_ps instead
HamzaHassanain Dec 1, 2025
4efe60a
Implemented InterpBuiltin cpp implmentaions
HamzaHassanain Dec 1, 2025
d5084f7
styled The tests
HamzaHassanain Dec 1, 2025
db4e844
Merge branch 'main' of https://github.com/llvm/llvm-project into cons…
HamzaHassanain Dec 1, 2025
51d213d
ran the format commands
HamzaHassanain Dec 1, 2025
154eea1
Fixed The Formates!
HamzaHassanain Dec 1, 2025
c5ecb01
Formatted avx512f header using the projects current clang-format
HamzaHassanain Dec 1, 2025
28a823a
Update clang/lib/AST/ByteCode/InterpBuiltin.cpp
HamzaHassanain Dec 1, 2025
b9b71bd
Did the Reuqested Changes
HamzaHassanain Dec 1, 2025
21ab33c
Formated The InterpBuiltin
HamzaHassanain Dec 1, 2025
4957b30
fixed a naming confilcts
HamzaHassanain Dec 2, 2025
8b786f0
added assertion on getElementType() and getASTContext().FloatTy
HamzaHassanain Dec 2, 2025
2bab71e
Ran The formatter Again
HamzaHassanain Dec 2, 2025
5964bed
resolved merge conflict
HamzaHassanain Dec 2, 2025
cc1dada
Did the requested changes
HamzaHassanain Dec 2, 2025
b2b68d9
Fix loop condition in element copy operation
HamzaHassanain Dec 2, 2025
c430491
Fixed The Whitespace
HamzaHassanain Dec 2, 2025
deeb958
Merge branch 'constexpr-cvtpd2ps' of https://github.com/HamzaHassanai…
HamzaHassanain Dec 2, 2025
88488aa
Did the required changes
HamzaHassanain Dec 2, 2025
a08303d
Did the HasRoundingMask change
HamzaHassanain Dec 2, 2025
2484594
Merge branch 'main' into constexpr-cvtpd2ps
RKSimon Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,20 @@ let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
}
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">;
def cvttsd2si : X86Builtin<"int(_Vector<2, double>)">;
def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
def cvtps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
def cvttps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
}
Expand Down Expand Up @@ -462,19 +468,21 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
def cvttps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;

foreach Op = ["max", "min"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
Expand Down Expand Up @@ -1004,6 +1012,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
Expand All @@ -1017,7 +1029,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
}
Expand Down Expand Up @@ -1452,9 +1463,12 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
}

Expand Down Expand Up @@ -3288,7 +3302,6 @@ let Features = "avx512bw,avx512vl",
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
Expand Down
162 changes: 162 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,38 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
return Result;
}

// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
InterpState &S, const Expr *DiagExpr) {
if (Src.isInfinity()) {
if (S.diagnosing())
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0;
return false;
}
if (Src.isNaN()) {
if (S.diagnosing())
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1;
return false;
}
APFloat Val = Src;
bool LosesInfo = false;
APFloat::opStatus Status = Val.convert(
APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
if (LosesInfo || Val.isDenormal()) {
if (S.diagnosing())
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict);
return false;
}
if (Status != APFloat::opOK) {
if (S.diagnosing())
S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr);
return false;
}
Dst.copy(Val);
return true;
}

static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call) {
Expand Down Expand Up @@ -3359,6 +3391,122 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
pushInteger(S, RetMask, Call->getType());
return true;
}
static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool HasMask,
bool HasRounding) {
APSInt Rounding, MaskInt;
Pointer Src, B, A;

if (HasMask) {
assert(Call->getNumArgs() == 5);
Rounding = popToAPSInt(S, Call->getArg(4));
MaskInt = popToAPSInt(S, Call->getArg(3));
Src = S.Stk.pop<Pointer>();
B = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
!CheckLoad(S, OpPC, Src))
return false;
} else {
assert(Call->getNumArgs() == 2);
B = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
return false;
}

const auto *DstVTy = Call->getType()->castAs<VectorType>();
unsigned NumElems = DstVTy->getNumElements();
const Pointer &Dst = S.Stk.peek<Pointer>();

// Copy all elements except lane 0 (overwritten below) from A to Dst.
for (unsigned I = 1; I != NumElems; ++I)
Dst.elem<Floating>(I) = A.elem<Floating>(I);

// Convert element 0 from double to float, or use Src if masked off.
if (!HasMask || (MaskInt.getZExtValue() & 0x1)) {
assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
"cvtsd2ss requires float element type in destination vector");

Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
return false;
Dst.elem<Floating>(0) = Conv;
} else {
Dst.elem<Floating>(0) = Src.elem<Floating>(0);
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool IsMasked,
bool HasRounding) {

APSInt MaskVal;
Pointer PassThrough;
Pointer Src;
APSInt Rounding;

if (IsMasked) {
// Pop in reverse order.
if (HasRounding) {
Rounding = popToAPSInt(S, Call->getArg(3));
MaskVal = popToAPSInt(S, Call->getArg(2));
PassThrough = S.Stk.pop<Pointer>();
Src = S.Stk.pop<Pointer>();
} else {
MaskVal = popToAPSInt(S, Call->getArg(2));
PassThrough = S.Stk.pop<Pointer>();
Src = S.Stk.pop<Pointer>();
}

if (!CheckLoad(S, OpPC, PassThrough))
return false;
} else {
// Pop source only.
Src = S.Stk.pop<Pointer>();
}

if (!CheckLoad(S, OpPC, Src))
return false;

const auto *RetVTy = Call->getType()->castAs<VectorType>();
unsigned RetElems = RetVTy->getNumElements();
unsigned SrcElems = Src.getNumElems();
const Pointer &Dst = S.Stk.peek<Pointer>();

// Initialize destination with passthrough or zeros.
for (unsigned I = 0; I != RetElems; ++I)
if (IsMasked)
Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
else
Dst.elem<Floating>(I) = Floating(APFloat(0.0f));

assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
"cvtpd2ps requires float element type in return vector");

// Convert double to float for enabled elements (only process source elements
// that exist).
for (unsigned I = 0; I != SrcElems; ++I) {
if (IsMasked && !MaskVal[I])
continue;

APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();

Floating Conv = S.allocFloat(
S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
return false;
Dst.elem<Floating>(I) = Conv;
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
Expand Down Expand Up @@ -5169,6 +5317,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_cvtq2mask512:
return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_cvtsd2ss:
return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false, false);

case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true, true);

case X86::BI__builtin_ia32_cvtpd2ps:
case X86::BI__builtin_ia32_cvtpd2ps256:
return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false);
case X86::BI__builtin_ia32_cvtpd2ps_mask:
return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false);
case X86::BI__builtin_ia32_cvtpd2ps512_mask:
return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true);

case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpw128_mask:
case X86::BI__builtin_ia32_cmpd128_mask:
Expand Down
Loading