Skip to content

Commit defa810

Browse files
committed
[Memory] Fix alignment to 16 bytes
Fix segmentation fault happening on linux when `src` or `dst` is not aligned to 16 bytes. Assert against use of `src_ptr` and `dest_ptr` which are not unaligned by the same amount of bits. Grab all of words/dwords which are unaligned with 16 bytes.
1 parent 7e1c04f commit defa810

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

src/xenia/base/memory.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,18 @@ void copy_128_aligned(void* dest, const void* src, size_t count) {
2727
void copy_and_swap_16_aligned(void* dest_ptr, const void* src_ptr,
2828
size_t count) {
2929
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0x1);
30+
assert_true((reinterpret_cast<uintptr_t>(src_ptr) & 0xF) ==
31+
(reinterpret_cast<uintptr_t>(dest_ptr) & 0xF));
3032
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
3133
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
3234
__m128i shufmask =
3335
_mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07,
3436
0x04, 0x05, 0x02, 0x03, 0x00, 0x01);
3537

3638
size_t i = 0;
37-
size_t unaligned_words = (reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / 2;
39+
size_t unaligned_words =
40+
((0x10 - (reinterpret_cast<uintptr_t>(src_ptr) & 0xF)) & 0xF) /
41+
sizeof(uint16_t);
3842
for (; unaligned_words > 0 && i < count; unaligned_words--, i++) {
3943
// Copy up to 16 byte alignment.
4044
dest[i] = byte_swap(src[i]);
@@ -71,14 +75,18 @@ void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
7175
void copy_and_swap_32_aligned(void* dest_ptr, const void* src_ptr,
7276
size_t count) {
7377
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0x3);
78+
assert_true((reinterpret_cast<uintptr_t>(src_ptr) & 0xF) ==
79+
(reinterpret_cast<uintptr_t>(dest_ptr) & 0xF));
7480
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
7581
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
7682
__m128i shufmask =
7783
_mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05,
7884
0x06, 0x07, 0x00, 0x01, 0x02, 0x03);
7985

8086
size_t i = 0;
81-
size_t unaligned_dwords = (reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / 4;
87+
size_t unaligned_dwords =
88+
((0x10 - (reinterpret_cast<uintptr_t>(src_ptr) & 0xF)) & 0xF) /
89+
sizeof(uint32_t);
8290
for (; unaligned_dwords > 0 && i < count; unaligned_dwords--, i++) {
8391
// Copy up to 16 byte alignment.
8492
dest[i] = byte_swap(src[i]);
@@ -115,14 +123,17 @@ void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
115123
void copy_and_swap_64_aligned(void* dest_ptr, const void* src_ptr,
116124
size_t count) {
117125
assert_zero(reinterpret_cast<uintptr_t>(src_ptr) & 0x7);
126+
assert_true((reinterpret_cast<uintptr_t>(src_ptr) & 0xF) ==
127+
(reinterpret_cast<uintptr_t>(dest_ptr) & 0xF));
118128
auto dest = reinterpret_cast<uint64_t*>(dest_ptr);
119129
auto src = reinterpret_cast<const uint64_t*>(src_ptr);
120130
__m128i shufmask =
121131
_mm_set_epi8(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01,
122132
0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
123133

124134
size_t i = 0;
125-
size_t unaligned_qwords = (reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / 8;
135+
size_t unaligned_qwords =
136+
(reinterpret_cast<uintptr_t>(src_ptr) & 0xF) / sizeof(uint64_t);
126137
for (; unaligned_qwords > 0 && i < count; unaligned_qwords--, i++) {
127138
// Copy up to 16 byte alignment.
128139
dest[i] = byte_swap(src[i]);

0 commit comments

Comments
 (0)