Skip to content

Commit 874d3f0

Browse files
committed
[PCUDA] Use non-managed memory due to atomic issue on AMD with managed memory
1 parent 72e316b commit 874d3f0

File tree

1 file changed

+23
-9
lines changed

1 file changed

+23
-9
lines changed

tests/pcuda/atomic.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,22 +90,29 @@ using min_test_types =
9090

9191
template<class T>
9292
void test_atomic_min() {
93+
// atomicMin/Max on ROCm seem to be bugged when managed memory is used.
94+
// Use device memory isntead.
9395
T* data;
9496
const size_t problem_size = 16;
9597
const T offset = T(1024*1024);
96-
BOOST_CHECK(pcudaMallocManaged(&data, (problem_size + 1) * sizeof(T)) ==
98+
BOOST_CHECK(pcudaMalloc(&data, (problem_size + 1) * sizeof(T)) ==
9799
pcudaSuccess);
100+
std::vector<T> input(problem_size+1);
98101
for(int i = 0; i < problem_size+1; ++i)
99-
data[i] = T(i);
100-
data[0] = offset;
102+
input[i] = T(i);
103+
input[0] = offset;
104+
BOOST_CHECK(pcudaMemcpy(data, input.data(), input.size() * sizeof(T),
105+
pcudaMemcpyDefault) == pcudaSuccess);
101106

102107
pcudaParallelFor(1, problem_size, [=](){
103108
int gid = threadIdx.x;
104109
atomicMin(data, data[gid+1]);
105110
});
106111
BOOST_CHECK(pcudaDeviceSynchronize() == pcudaSuccess);
107-
std::cout << *data << std::endl;
108-
BOOST_CHECK(*data == 1);
112+
BOOST_CHECK(pcudaMemcpy(input.data(), data, input.size() * sizeof(T),
113+
pcudaMemcpyDefault) == pcudaSuccess);
114+
115+
BOOST_CHECK(input[0] == 1);
109116

110117
BOOST_CHECK(pcudaFree(data) == pcudaSuccess);
111118
}
@@ -120,21 +127,28 @@ using max_test_types =
120127

121128
template<class T>
122129
void test_atomic_max() {
130+
// atomicMin/Max on ROCm seem to be bugged when managed memory is used.
131+
// Use device memory isntead.
123132
T* data;
124133
const size_t problem_size = 16;
125134
const T offset = T(2);
126-
BOOST_CHECK(pcudaMallocManaged(&data, (problem_size + 1) * sizeof(T)) ==
135+
BOOST_CHECK(pcudaMalloc(&data, (problem_size + 1) * sizeof(T)) ==
127136
pcudaSuccess);
137+
std::vector<T> input(problem_size+1);
128138
for(int i = 0; i < problem_size+1; ++i)
129-
data[i] = T(i);
130-
data[0] = offset;
139+
input[i] = T(i);
140+
input[0] = offset;
141+
BOOST_CHECK(pcudaMemcpy(data, input.data(), input.size() * sizeof(T),
142+
pcudaMemcpyDefault) == pcudaSuccess);
131143

132144
pcudaParallelFor(1, problem_size, [=](){
133145
int gid = threadIdx.x;
134146
atomicMax(data, data[gid+1]);
135147
});
136148
BOOST_CHECK(pcudaDeviceSynchronize() == pcudaSuccess);
137-
BOOST_CHECK(*data == problem_size);
149+
BOOST_CHECK(pcudaMemcpy(input.data(), data, input.size() * sizeof(T),
150+
pcudaMemcpyDefault) == pcudaSuccess);
151+
BOOST_CHECK(input[0] == problem_size);
138152

139153
BOOST_CHECK(pcudaFree(data) == pcudaSuccess);
140154
}

0 commit comments

Comments
 (0)