Skip to content

Commit d49dd18

Browse files
Simd float concrete comparisons (#81892)
There's no reason for these to ever be calls, so they should be transparent instead of just aEIC. Also adds concrete versions of comparisons with scalars, and filecheck tests to make sure these generate 1-2 instruction sequences in release on arm64 (x86_64 is a little trickier to test due to frame pointers, but if we get the right codgen on arm64, in practice we do well on x86_64 for these too). Also makes filecheck patterns for repeating initializers a bit more robust.
1 parent f91b4b0 commit d49dd18

6 files changed

+225
-46
lines changed

stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -80,53 +80,71 @@ extension SIMD${n} where Scalar == ${Scalar} {
8080
}
8181

8282
% end
83-
/// A vector mask with the result of a pointwise equality comparison.
84-
@_alwaysEmitIntoClient
85-
public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
86-
SIMDMask<MaskStorage>(${MaskExt}(
87-
Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
88-
))
89-
}
90-
91-
/// A vector mask with the result of a pointwise inequality comparison.
92-
@_alwaysEmitIntoClient
93-
public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
94-
SIMDMask<MaskStorage>(${MaskExt}(
95-
Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
96-
))
97-
}
98-
99-
/// A vector mask with the result of a pointwise less-than comparison.
100-
@_alwaysEmitIntoClient
101-
public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
102-
SIMDMask<MaskStorage>(${MaskExt}(
103-
Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
104-
))
105-
}
106-
107-
/// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
108-
@_alwaysEmitIntoClient
109-
public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
83+
%{
84+
compares = [
85+
("==", "oeq", "equal to"),
86+
("!=", "une", "not equal to"),
87+
("<", "olt", "less than"),
88+
("<=", "ole", "less than or equal to"),
89+
(">=", "oge", "greater than or equal to"),
90+
(">", "ogt", "greater than")
91+
]
92+
}%
93+
% for (op, bi, description) in compares:
94+
/// Pointwise compare ${description}.
95+
///
96+
/// Each lane of the result is true if that lane of a is ${description} the
97+
/// corresponding lane of b, and false otherwise.
98+
///
99+
/// Equivalent to:
100+
/// ```
101+
/// var result = SIMDMask<MaskStorage>()
102+
/// for i in 0..<${n} {
103+
/// result[i] = (a[i] ${op} b[i])
104+
/// }
105+
/// ```
106+
@_alwaysEmitIntoClient @_transparent
107+
public static func .${op}(a: Self, b: Self) -> SIMDMask<MaskStorage> {
110108
SIMDMask<MaskStorage>(${MaskExt}(
111-
Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
109+
Builtin.fcmp_${bi}_${Builtin}(a._storage._value, b._storage._value)
112110
))
113111
}
114112

115-
/// A vector mask with the result of a pointwise greater-than comparison.
116-
@_alwaysEmitIntoClient
117-
public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
118-
SIMDMask<MaskStorage>(${MaskExt}(
119-
Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
120-
))
113+
/// Pointwise compare ${description}.
114+
///
115+
/// Each lane of the result is true if that lane of a is ${description} b,
116+
/// and false otherwise.
117+
///
118+
/// Equivalent to:
119+
/// ```
120+
/// var result = SIMDMask<MaskStorage>()
121+
/// for i in 0..<${n} {
122+
/// result[i] = (a[i] ${op} b)
123+
/// }
124+
/// ```
125+
@_alwaysEmitIntoClient @_transparent
126+
public static func .${op}(a: Self, b: Scalar) -> SIMDMask<MaskStorage> {
127+
a .${op} Self(repeating: b)
121128
}
122-
123-
/// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
124-
@_alwaysEmitIntoClient
125-
public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
126-
SIMDMask<MaskStorage>(${MaskExt}(
127-
Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
128-
))
129+
130+
/// Pointwise compare ${description}.
131+
///
132+
/// Each lane of the result is true if a is ${description} the corresponding
133+
/// lane of b, and false otherwise.
134+
///
135+
/// Equivalent to:
136+
/// ```
137+
/// var result = SIMDMask<MaskStorage>()
138+
/// for i in 0..<${n} {
139+
/// result[i] = (a ${op} b[i])
140+
/// }
141+
/// ```
142+
@_alwaysEmitIntoClient @_transparent
143+
public static func .${op}(a: Scalar, b: Self) -> SIMDMask<MaskStorage> {
144+
Self(repeating: a) .${op} b
129145
}
146+
147+
% end
130148
}
131149
% if bits == 16:
132150
#endif
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
//===--- SIMDFloatComparisons.swift.gyb -----------------------*- swift -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
// RUN: %empty-directory(%t)
13+
// RUN: %gyb %s -o %t/SIMDFloatComparisons.swift
14+
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatComparisons.swift -S | %FileCheck %t/SIMDFloatComparisons.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
15+
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatComparisons.swift -S -O | %FileCheck %t/SIMDFloatComparisons.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
16+
17+
import Swift
18+
19+
%for bits in [16,32,64]:
20+
% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
21+
% for totalBits in [64,128]:
22+
% n = totalBits // bits
23+
% if n != 1:
24+
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
25+
% if bits == 16:
26+
#if arch(arm64)
27+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
28+
% end
29+
func compare_eq${n}x${bits}(
30+
_ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
31+
) -> SIMDMask<SIMD${n}<Int${bits}>> {
32+
a .== b
33+
}
34+
% if bits == 16:
35+
#endif
36+
// CHECK-arm64: compare_eq${n}x${bits}{{[[:alnum:]_]+}}:
37+
% else:
38+
// CHECK: compare_eq${n}x${bits}{{[[:alnum:]_]+}}:
39+
// CHECK-x86_64: cmpeqp${'s' if bits == 32 else 'd'}
40+
// CHECK-x86_64: ret
41+
% end
42+
// CHECKO-arm64-NEXT: fcmeq.${neonSuffix} v0, v0, v1
43+
// CHECKO-arm64-NEXT: ret
44+
// CHECKOnone-arm64: fcmeq.${neonSuffix}
45+
// CHECKOnone-arm64: ret
46+
47+
% if bits == 16:
48+
#if arch(arm64)
49+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
50+
% end
51+
func compare_ne${n}x${bits}(
52+
_ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
53+
) -> SIMDMask<SIMD${n}<Int${bits}>> {
54+
a .!= b
55+
}
56+
% if bits == 16:
57+
#endif
58+
// CHECK-arm64: compare_ne${n}x${bits}{{[[:alnum:]_]+}}:
59+
% else:
60+
// CHECK: compare_ne${n}x${bits}{{[[:alnum:]_]+}}:
61+
// CHECK-x86_64: cmpneqp${'s' if bits == 32 else 'd'}
62+
// CHECK-x86_64: ret
63+
% end
64+
// CHECKO-arm64-NEXT: fcmeq.${neonSuffix} [[TMP:v[0-9]+]], v0, v1
65+
// CHECKO-arm64-NEXT: mvn.${totalBits//8}b v0, [[TMP]]
66+
// CHECKO-arm64-NEXT: ret
67+
// CHECKOnone-arm64: fcmeq.${neonSuffix}
68+
// CHECKOnone-arm64: mvn.${totalBits//8}b
69+
// CHECKOnone-arm64: ret
70+
71+
% if bits == 16:
72+
#if arch(arm64)
73+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
74+
% end
75+
func compare_lt${n}x${bits}(
76+
_ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
77+
) -> SIMDMask<SIMD${n}<Int${bits}>> {
78+
a .< b
79+
}
80+
% if bits == 16:
81+
#endif
82+
// CHECK-arm64: compare_lt${n}x${bits}{{[[:alnum:]_]+}}:
83+
% else:
84+
// CHECK: compare_lt${n}x${bits}{{[[:alnum:]_]+}}:
85+
// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
86+
// CHECK-x86_64: ret
87+
% end
88+
// CHECKO-arm64-NEXT: fcmgt.${neonSuffix} v0, v1, v0
89+
// CHECKO-arm64-NEXT: ret
90+
// CHECKOnone-arm64: fcmgt.${neonSuffix}
91+
// CHECKOnone-arm64: ret
92+
93+
% if bits == 16:
94+
#if arch(arm64)
95+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
96+
% end
97+
func compare_le${n}x${bits}(
98+
_ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
99+
) -> SIMDMask<SIMD${n}<Int${bits}>> {
100+
a .<= b
101+
}
102+
% if bits == 16:
103+
#endif
104+
// CHECK-arm64: compare_le${n}x${bits}{{[[:alnum:]_]+}}:
105+
% else:
106+
// CHECK: compare_le${n}x${bits}{{[[:alnum:]_]+}}:
107+
// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
108+
// CHECK-x86_64: ret
109+
% end
110+
// CHECKO-arm64-NEXT: fcmge.${neonSuffix} v0, v1, v0
111+
// CHECKO-arm64-NEXT: ret
112+
// CHECKOnone-arm64: fcmge.${neonSuffix}
113+
// CHECKOnone-arm64: ret
114+
115+
% if bits == 16:
116+
#if arch(arm64)
117+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
118+
% end
119+
func compare_ge${n}x${bits}(
120+
_ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
121+
) -> SIMDMask<SIMD${n}<Int${bits}>> {
122+
a .>= b
123+
}
124+
% if bits == 16:
125+
#endif
126+
// CHECK-arm64: compare_ge${n}x${bits}{{[[:alnum:]_]+}}:
127+
% else:
128+
// CHECK: compare_ge${n}x${bits}{{[[:alnum:]_]+}}:
129+
// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
130+
// CHECK-x86_64: ret
131+
% end
132+
// CHECKO-arm64-NEXT: fcmge.${neonSuffix} v0, v0, v1
133+
// CHECKO-arm64-NEXT: ret
134+
// CHECKOnone-arm64: fcmge.${neonSuffix}
135+
// CHECKOnone-arm64: ret
136+
137+
% if bits == 16:
138+
#if arch(arm64)
139+
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
140+
% end
141+
func compare_gt${n}x${bits}(
142+
_ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
143+
) -> SIMDMask<SIMD${n}<Int${bits}>> {
144+
a .> b
145+
}
146+
% if bits == 16:
147+
#endif
148+
// CHECK-arm64: compare_gt${n}x${bits}{{[[:alnum:]_]+}}:
149+
% else:
150+
// CHECK: compare_gt${n}x${bits}{{[[:alnum:]_]+}}:
151+
// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
152+
// CHECK-x86_64: ret
153+
% end
154+
// CHECKO-arm64-NEXT: fcmgt.${neonSuffix} v0, v0, v1
155+
// CHECKO-arm64-NEXT: ret
156+
// CHECKOnone-arm64: fcmgt.${neonSuffix}
157+
// CHECKOnone-arm64: ret
158+
159+
% end
160+
% end
161+
%end

test/stdlib/SIMDFloatInitializers.swift.gyb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
3131
}
3232
% if bits == 16:
3333
#endif
34-
// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
34+
// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]_]+}}:
3535
% else:
36-
// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
36+
// CHECK: repeating${n}x${bits}{{[[:alnum:]_]+}}:
3737
% end
3838
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
3939
// CHECKO-arm64-NEXT: ret

test/stdlib/SIMDMaskInitializers.swift.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import Swift
2424
func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>> {
2525
SIMDMask(repeating: scalar)
2626
}
27-
// CHECK: repeating${n}_mask${bits}{{[[:alnum:]]+}}:
27+
// CHECK: repeating${n}_mask${bits}{{[[:alnum:]_]+}}:
2828
// CHECKO-arm64-NEXT: sbfx [[REG:[wx][0-9]]], {{[wx]}}0, #0, #1
2929
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, [[REG]]
3030
// CHECKO-arm64-NEXT: ret

test/stdlib/SIMDSignedInitializers.swift.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import Swift
2525
func repeating${n}_int${bits}(_ scalar: Int${bits}) -> SIMD${n}<Int${bits}> {
2626
SIMD${n}(repeating: scalar)
2727
}
28-
// CHECK: repeating${n}_int${bits}{{[[:alnum:]]+}}:
28+
// CHECK: repeating${n}_int${bits}{{[[:alnum:]_]+}}:
2929
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
3030
// CHECKO-arm64-NEXT: ret
3131
// CHECKOnone-arm64: dup.${neonSuffix}

test/stdlib/SIMDUnsignedInitializers.swift.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import Swift
2525
func repeating${n}_uint${bits}(_ scalar: UInt${bits}) -> SIMD${n}<UInt${bits}> {
2626
SIMD${n}(repeating: scalar)
2727
}
28-
// CHECK: repeating${n}_uint${bits}{{[[:alnum:]]+}}:
28+
// CHECK: repeating${n}_uint${bits}{{[[:alnum:]_]+}}:
2929
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
3030
// CHECKO-arm64-NEXT: ret
3131
// CHECKOnone-arm64: dup.${neonSuffix}

0 commit comments

Comments
 (0)