Skip to content

Commit bb9820a

Browse files
uytvbnbwrsandman
authored andcommitted
[Linux] Force passing XMM values by pointer
1 parent 999ec74 commit bb9820a

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

src/xenia/cpu/backend/x64/x64_seq_vector.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB, VECTOR_SUB);
693693
// OPCODE_VECTOR_SHL
694694
// ============================================================================
695695
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
696-
static __m128i EmulateVectorShl(void*, __m128i src1, __m128i src2) {
696+
static __m128i EmulateVectorShl(void*, __m128i& src1, __m128i& src2) {
697697
alignas(16) T value[16 / sizeof(T)];
698698
alignas(16) T shamt[16 / sizeof(T)];
699699

@@ -882,7 +882,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHL, VECTOR_SHL_V128);
882882
// OPCODE_VECTOR_SHR
883883
// ============================================================================
884884
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
885-
static __m128i EmulateVectorShr(void*, __m128i src1, __m128i src2) {
885+
static __m128i EmulateVectorShr(void*, __m128i& src1, __m128i& src2) {
886886
alignas(16) T value[16 / sizeof(T)];
887887
alignas(16) T shamt[16 / sizeof(T)];
888888

@@ -1212,7 +1212,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA, VECTOR_SHA_V128);
12121212
// OPCODE_VECTOR_ROTATE_LEFT
12131213
// ============================================================================
12141214
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
1215-
static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) {
1215+
static __m128i EmulateVectorRotateLeft(void*, __m128i& src1, __m128i& src2) {
12161216
alignas(16) T value[16 / sizeof(T)];
12171217
alignas(16) T shamt[16 / sizeof(T)];
12181218

@@ -1305,7 +1305,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT, VECTOR_ROTATE_LEFT_V128);
13051305
// OPCODE_VECTOR_AVERAGE
13061306
// ============================================================================
13071307
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
1308-
static __m128i EmulateVectorAverage(void*, __m128i src1, __m128i src2) {
1308+
static __m128i EmulateVectorAverage(void*, __m128i& src1, __m128i& src2) {
13091309
alignas(16) T src1v[16 / sizeof(T)];
13101310
alignas(16) T src2v[16 / sizeof(T)];
13111311
alignas(16) T value[16 / sizeof(T)];
@@ -1873,7 +1873,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
18731873
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
18741874
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
18751875
}
1876-
static __m128i EmulateFLOAT16_2(void*, __m128 src1) {
1876+
static __m128i EmulateFLOAT16_2(void*, __m128& src1) {
18771877
alignas(16) float a[4];
18781878
alignas(16) uint16_t b[8];
18791879
_mm_store_ps(a, src1);
@@ -1912,7 +1912,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
19121912
e.vmovaps(i.dest, e.xmm0);
19131913
}
19141914
}
1915-
static __m128i EmulateFLOAT16_4(void*, __m128 src1) {
1915+
static __m128i EmulateFLOAT16_4(void*, __m128& src1) {
19161916
alignas(16) float a[4];
19171917
alignas(16) uint16_t b[8];
19181918
_mm_store_ps(a, src1);
@@ -2043,8 +2043,8 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
20432043
// Merge XZ and YW.
20442044
e.vorps(i.dest, e.xmm0);
20452045
}
2046-
static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i src1,
2047-
__m128i src2) {
2046+
static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i& src1,
2047+
__m128i& src2) {
20482048
alignas(16) uint16_t a[8];
20492049
alignas(16) uint16_t b[8];
20502050
alignas(16) uint8_t c[16];
@@ -2056,7 +2056,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
20562056
}
20572057
return _mm_load_si128(reinterpret_cast<__m128i*>(c));
20582058
}
2059-
static __m128i EmulatePack8_IN_16_UN_UN(void*, __m128i src1, __m128i src2) {
2059+
static __m128i EmulatePack8_IN_16_UN_UN(void*, __m128i& src1, __m128i& src2) {
20602060
alignas(16) uint8_t a[16];
20612061
alignas(16) uint8_t b[16];
20622062
alignas(16) uint8_t c[16];
@@ -2289,7 +2289,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
22892289
e.vpor(i.dest, e.GetXmmConstPtr(XMMOne));
22902290
// To convert to 0 to 1, games multiply by 0x47008081 and add 0xC7008081.
22912291
}
2292-
static __m128 EmulateFLOAT16_2(void*, __m128i src1) {
2292+
static __m128 EmulateFLOAT16_2(void*, __m128i& src1) {
22932293
alignas(16) uint16_t a[8];
22942294
alignas(16) float b[4];
22952295
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
@@ -2346,7 +2346,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
23462346
e.vmovaps(i.dest, e.xmm0);
23472347
}
23482348
}
2349-
static __m128 EmulateFLOAT16_4(void*, __m128i src1) {
2349+
static __m128 EmulateFLOAT16_4(void*, __m128i& src1) {
23502350
alignas(16) uint16_t a[8];
23512351
alignas(16) float b[4];
23522352
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
@@ -2624,4 +2624,4 @@ EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
26242624
} // namespace x64
26252625
} // namespace backend
26262626
} // namespace cpu
2627-
} // namespace xe
2627+
} // namespace xe

src/xenia/cpu/backend/x64/x64_sequences.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,7 +2401,7 @@ EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128);
24012401
// TODO(benvanik): use approx here:
24022402
// https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
24032403
struct POW2_F32 : Sequence<POW2_F32, I<OPCODE_POW2, F32Op, F32Op>> {
2404-
static __m128 EmulatePow2(void*, __m128 src) {
2404+
static __m128 EmulatePow2(void*, __m128& src) {
24052405
float src_value;
24062406
_mm_store_ss(&src_value, src);
24072407
float result = std::exp2(src_value);
@@ -2415,7 +2415,7 @@ struct POW2_F32 : Sequence<POW2_F32, I<OPCODE_POW2, F32Op, F32Op>> {
24152415
}
24162416
};
24172417
struct POW2_F64 : Sequence<POW2_F64, I<OPCODE_POW2, F64Op, F64Op>> {
2418-
static __m128d EmulatePow2(void*, __m128d src) {
2418+
static __m128d EmulatePow2(void*, __m128d& src) {
24192419
double src_value;
24202420
_mm_store_sd(&src_value, src);
24212421
double result = std::exp2(src_value);
@@ -2429,7 +2429,7 @@ struct POW2_F64 : Sequence<POW2_F64, I<OPCODE_POW2, F64Op, F64Op>> {
24292429
}
24302430
};
24312431
struct POW2_V128 : Sequence<POW2_V128, I<OPCODE_POW2, V128Op, V128Op>> {
2432-
static __m128 EmulatePow2(void*, __m128 src) {
2432+
static __m128 EmulatePow2(void*, __m128& src) {
24332433
alignas(16) float values[4];
24342434
_mm_store_ps(values, src);
24352435
for (size_t i = 0; i < 4; ++i) {
@@ -2452,7 +2452,7 @@ EMITTER_OPCODE_TABLE(OPCODE_POW2, POW2_F32, POW2_F64, POW2_V128);
24522452
// https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
24532453
// TODO(benvanik): this emulated fn destroys all xmm registers! don't do it!
24542454
struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
2455-
static __m128 EmulateLog2(void*, __m128 src) {
2455+
static __m128 EmulateLog2(void*, __m128& src) {
24562456
float src_value;
24572457
_mm_store_ss(&src_value, src);
24582458
float result = std::log2(src_value);
@@ -2470,7 +2470,7 @@ struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
24702470
}
24712471
};
24722472
struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
2473-
static __m128d EmulateLog2(void*, __m128d src) {
2473+
static __m128d EmulateLog2(void*, __m128d& src) {
24742474
double src_value;
24752475
_mm_store_sd(&src_value, src);
24762476
double result = std::log2(src_value);
@@ -2488,7 +2488,7 @@ struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
24882488
}
24892489
};
24902490
struct LOG2_V128 : Sequence<LOG2_V128, I<OPCODE_LOG2, V128Op, V128Op>> {
2491-
static __m128 EmulateLog2(void*, __m128 src) {
2491+
static __m128 EmulateLog2(void*, __m128& src) {
24922492
alignas(16) float values[4];
24932493
_mm_store_ps(values, src);
24942494
for (size_t i = 0; i < 4; ++i) {
@@ -2812,7 +2812,7 @@ struct SHL_V128 : Sequence<SHL_V128, I<OPCODE_SHL, V128Op, V128Op, I8Op>> {
28122812
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShlV128));
28132813
e.vmovaps(i.dest, e.xmm0);
28142814
}
2815-
static __m128i EmulateShlV128(void*, __m128i src1, uint8_t src2) {
2815+
static __m128i EmulateShlV128(void*, __m128i& src1, uint8_t src2) {
28162816
// Almost all instances are shamt = 1, but non-constant.
28172817
// shamt is [0,7]
28182818
uint8_t shamt = src2 & 0x7;
@@ -2889,7 +2889,7 @@ struct SHR_V128 : Sequence<SHR_V128, I<OPCODE_SHR, V128Op, V128Op, I8Op>> {
28892889
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShrV128));
28902890
e.vmovaps(i.dest, e.xmm0);
28912891
}
2892-
static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) {
2892+
static __m128i EmulateShrV128(void*, __m128i& src1, uint8_t src2) {
28932893
// Almost all instances are shamt = 1, but non-constant.
28942894
// shamt is [0,7]
28952895
uint8_t shamt = src2 & 0x7;

0 commit comments

Comments
 (0)