Skip to content

Commit 30b30fd

Browse files
committed
[clang] Fix sub-integer __builtin_elementwise_(add|sub)_sat
These builtins would unconditionally perform the usual arithmetic conversions on promotable scalar integer arguments. This meant in practice that char and short arguments were promoted to int, and the operation was truncated back down afterwards. This in effect silently replaced a saturating add/sub with a regular add/sub, which is not intuitive (or intended) behaviour. With this patch, promotable scalar integer types are not promoted to int, but are kept intact. If the types differ, the smaller integer is promoted to the larger one. The signedness of the operation matches the larger integer type. No change is made to vector types, which are both not promoted and whose element types must match.
1 parent e0a79ee commit 30b30fd

File tree

2 files changed

+134
-2
lines changed

2 files changed

+134
-2
lines changed

clang/lib/Sema/SemaChecking.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,6 +2765,44 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
27652765
// types only.
27662766
case Builtin::BI__builtin_elementwise_add_sat:
27672767
case Builtin::BI__builtin_elementwise_sub_sat: {
2768+
if (checkArgCount(TheCall, 2))
2769+
return ExprError();
2770+
ExprResult LHS = TheCall->getArg(0);
2771+
ExprResult RHS = TheCall->getArg(1);
2772+
QualType LHSType = LHS.get()->getType().getUnqualifiedType();
2773+
QualType RHSType = RHS.get()->getType().getUnqualifiedType();
2774+
// If both LHS/RHS are promotable integer types, do not perform the usual
2775+
// conversions - we must keep the saturating operation at the correct
2776+
// bitwidth.
2777+
if (Context.isPromotableIntegerType(LHSType) &&
2778+
Context.isPromotableIntegerType(RHSType)) {
2779+
// First, convert each argument to an r-value.
2780+
ExprResult ResLHS = DefaultFunctionArrayLvalueConversion(LHS.get());
2781+
if (ResLHS.isInvalid())
2782+
return ExprError();
2783+
LHS = ResLHS.get();
2784+
2785+
ExprResult ResRHS = DefaultFunctionArrayLvalueConversion(RHS.get());
2786+
if (ResRHS.isInvalid())
2787+
return ExprError();
2788+
RHS = ResRHS.get();
2789+
2790+
LHSType = LHS.get()->getType().getUnqualifiedType();
2791+
RHSType = RHS.get()->getType().getUnqualifiedType();
2792+
2793+
// If the two integer types are not of equal order, cast the smaller
2794+
// integer one to the larger one
2795+
if (int Order = Context.getIntegerTypeOrder(LHSType, RHSType); Order == 1)
2796+
RHS = ImpCastExprToType(RHS.get(), LHSType, CK_IntegralCast);
2797+
else if (Order == -1)
2798+
LHS = ImpCastExprToType(LHS.get(), RHSType, CK_IntegralCast);
2799+
2800+
TheCall->setArg(0, LHS.get());
2801+
TheCall->setArg(1, RHS.get());
2802+
TheCall->setType(LHS.get()->getType().getUnqualifiedType());
2803+
break;
2804+
}
2805+
27682806
if (BuiltinElementwiseMath(TheCall))
27692807
return ExprError();
27702808

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
6868
long long int i2, si8 vi1, si8 vi2,
6969
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
7070
_BitInt(31) bi1, _BitInt(31) bi2,
71-
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
71+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2,
72+
char c1, char c2, unsigned char uc1,
73+
unsigned char uc2, short s1, short s2,
74+
unsigned short us1, unsigned short us2) {
7275
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
7376
// CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
7477
// CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
@@ -114,14 +117,61 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
114117

115118
// CHECK: store i64 98, ptr %i1.addr, align 8
116119
i1 = __builtin_elementwise_add_sat(1, 'a');
120+
121+
// CHECK: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
122+
// CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1
123+
// CHECK-NEXT: call i8 @llvm.sadd.sat.i8(i8 [[C1]], i8 [[C2]])
124+
c1 = __builtin_elementwise_add_sat(c1, c2);
125+
126+
// CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
127+
// CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1
128+
// CHECK-NEXT: call i8 @llvm.uadd.sat.i8(i8 [[UC1]], i8 [[UC2]])
129+
uc1 = __builtin_elementwise_add_sat(uc1, uc2);
130+
131+
// CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
132+
// CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
133+
// CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[S2]])
134+
s1 = __builtin_elementwise_add_sat(s1, s2);
135+
136+
// CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2
137+
// CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
138+
// CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[US2]])
139+
us1 = __builtin_elementwise_add_sat(us1, us2);
140+
141+
// CHECK: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
142+
// CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
143+
// CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
144+
// CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[C1EXT]], i16 [[S1]])
145+
s1 = __builtin_elementwise_add_sat(c1, s1);
146+
147+
// CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
148+
// CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
149+
// CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
150+
// CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[S1]], i16 [[C1EXT]])
151+
s1 = __builtin_elementwise_add_sat(s1, c1);
152+
153+
// CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
154+
// CHECK-NEXT: [[UC1EXT:%.+]] = zext i8 [[UC1]] to i16
155+
// CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
156+
// CHECK-NEXT: call i16 @llvm.sadd.sat.i16(i16 [[UC1EXT]], i16 [[S1]])
157+
s1 = __builtin_elementwise_add_sat(uc1, s1);
158+
159+
// CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2
160+
// CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
161+
// CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
162+
// CHECK-NEXT: call i16 @llvm.uadd.sat.i16(i16 [[US1]], i16 [[C1EXT]])
163+
us1 = __builtin_elementwise_add_sat(us1, c1);
117164
}
118165

119166
void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
120167
float4 vf1, float4 vf2, long long int i1,
121168
long long int i2, si8 vi1, si8 vi2,
122169
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
123170
_BitInt(31) bi1, _BitInt(31) bi2,
124-
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
171+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2,
172+
char c1, char c2, unsigned char uc1,
173+
unsigned char uc2, short s1, short s2,
174+
unsigned short us1, unsigned short us2) {
125175
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
126176
// CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8
127177
// CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
@@ -167,6 +217,50 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
167217

168218
// CHECK: store i64 -96, ptr %i1.addr, align 8
169219
i1 = __builtin_elementwise_sub_sat(1, 'a');
220+
221+
// CHECK: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
222+
// CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr, align 1
223+
// CHECK-NEXT: call i8 @llvm.ssub.sat.i8(i8 [[C1]], i8 [[C2]])
224+
c1 = __builtin_elementwise_sub_sat(c1, c2);
225+
226+
// CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
227+
// CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr, align 1
228+
// CHECK-NEXT: call i8 @llvm.usub.sat.i8(i8 [[UC1]], i8 [[UC2]])
229+
uc1 = __builtin_elementwise_sub_sat(uc1, uc2);
230+
231+
// CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
232+
// CHECK-NEXT: [[S2:%.+]] = load i16, ptr %s2.addr, align 2
233+
// CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[S2]])
234+
s1 = __builtin_elementwise_sub_sat(s1, s2);
235+
236+
// CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2
237+
// CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2
238+
// CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[US2]])
239+
us1 = __builtin_elementwise_sub_sat(us1, us2);
240+
241+
// CHECK: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
242+
// CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
243+
// CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
244+
// CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[C1EXT]], i16 [[S1]])
245+
s1 = __builtin_elementwise_sub_sat(c1, s1);
246+
247+
// CHECK: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
248+
// CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
249+
// CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
250+
// CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[S1]], i16 [[C1EXT]])
251+
s1 = __builtin_elementwise_sub_sat(s1, c1);
252+
253+
// CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr, align 1
254+
// CHECK-NEXT: [[UC1EXT:%.+]] = zext i8 [[UC1]] to i16
255+
// CHECK-NEXT: [[S1:%.+]] = load i16, ptr %s1.addr, align 2
256+
// CHECK-NEXT: call i16 @llvm.ssub.sat.i16(i16 [[UC1EXT]], i16 [[S1]])
257+
s1 = __builtin_elementwise_sub_sat(uc1, s1);
258+
259+
// CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2
260+
// CHECK-NEXT: [[C1:%.+]] = load i8, ptr %c1.addr, align 1
261+
// CHECK-NEXT: [[C1EXT:%.+]] = sext i8 [[C1]] to i16
262+
// CHECK-NEXT: call i16 @llvm.usub.sat.i16(i16 [[US1]], i16 [[C1EXT]])
263+
us1 = __builtin_elementwise_sub_sat(us1, c1);
170264
}
171265

172266
void test_builtin_elementwise_maximum(float f1, float f2, double d1, double d2,

0 commit comments

Comments
 (0)