Skip to content
This repository was archived by the owner on Aug 31, 2019. It is now read-only.

Commit 561ba72

Browse files
committed
[X86] Use __builtin_ia32_vec_ext_v4hi and __builtin_ia32_vec_set_v4hi to implement pextrw/pinsertw MMX intrinsics instead of trying to use native IR.
Without this we end up generating code that doesn't use mmx registers and probably doesn't work well with other mmx intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274968 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent beb54c9 commit 561ba72

File tree

3 files changed

+18
-13
lines changed

3 files changed

+18
-13
lines changed

include/clang/Basic/BuiltinsX86.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "", "sse")
161161
TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "", "sse")
162162
TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "", "sse")
163163
TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "", "sse")
164+
TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "", "sse")
165+
TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "", "sse")
164166

165167
// MMX+SSE2
166168
TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "", "sse2")

lib/Headers/xmmintrin.h

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,12 +2114,8 @@ _mm_sfence(void)
21142114
/// 2: Bits [47:32] are copied to the destination.
21152115
/// 3: Bits [63:48] are copied to the destination.
21162116
/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
2117-
static __inline__ int __DEFAULT_FN_ATTRS
2118-
_mm_extract_pi16(__m64 __a, int __n)
2119-
{
2120-
__v4hi __b = (__v4hi)__a;
2121-
return (unsigned short)__b[__n & 3];
2122-
}
2117+
#define _mm_extract_pi16(a, n) __extension__ ({ \
2118+
(int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); })
21232119

21242120
/// \brief Copies data from the 64-bit vector of [4 x i16] to the destination,
21252121
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2145,13 +2141,8 @@ _mm_extract_pi16(__m64 __a, int __n)
21452141
/// bits in operand __a.
21462142
/// \returns A 64-bit integer vector containing the copied packed data from the
21472143
/// operands.
2148-
static __inline__ __m64 __DEFAULT_FN_ATTRS
2149-
_mm_insert_pi16(__m64 __a, int __d, int __n)
2150-
{
2151-
__v4hi __b = (__v4hi)__a;
2152-
__b[__n & 3] = __d;
2153-
return (__m64)__b;
2154-
}
2144+
#define _mm_insert_pi16(a, d, n) __extension__ ({ \
2145+
(__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); })
21552146

21562147
/// \brief Compares each of the corresponding packed 16-bit integer values of
21572148
/// the 64-bit integer vectors, and writes the greater value to the

test/CodeGen/mmx-builtins.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,12 @@ __m64 test_mm_cvttps_pi32(__m128 a) {
217217
return _mm_cvttps_pi32(a);
218218
}
219219

220+
int test_mm_extract_pi16(__m64 a) {
221+
// CHECK-LABEL: test_mm_extract_pi16
222+
// CHECK: call i32 @llvm.x86.mmx.pextr.w
223+
return _mm_extract_pi16(a, 2);
224+
}
225+
220226
__m64 test_m_from_int(int a) {
221227
// CHECK-LABEL: test_m_from_int
222228
// CHECK: insertelement <2 x i32>
@@ -265,6 +271,12 @@ __m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
265271
return _mm_hsubs_pi16(a, b);
266272
}
267273

274+
__m64 test_mm_insert_pi16(__m64 a, int d) {
275+
// CHECK-LABEL: test_mm_insert_pi16
276+
// CHECK: call x86_mmx @llvm.x86.mmx.pinsr.w
277+
return _mm_insert_pi16(a, d, 2);
278+
}
279+
268280
__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
269281
// CHECK-LABEL: test_mm_madd_pi16
270282
// CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd

0 commit comments

Comments
 (0)