Skip to content

Commit 2253b3c

Browse files
author
Chris Sullivan
committed
After profiling I found that a lot of extra time was spent in Finish while waiting for the atomic_int to
be updated to signal work was done. Thus I changed to using promises again, this time using a move_on_copy template to pass in the promise. This causes a dramatic performance increase, especially for the LFThreadPool. A few initial tests have it running the fastest.
1 parent 1a130b1 commit 2253b3c

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

include/ThreadPoolBase.hh

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <vector>
99
#include <utility>
1010
#include <future>
11+
#include "move_on_copy.hh"
1112

1213
using namespace std;
1314

@@ -22,31 +23,34 @@ public:
2223
virtual void Worker() = 0;
2324
virtual void CleanUp() { m_stopWorkers = true; }
2425
void JoinAll();
25-
void Finish();
2626

2727
template <typename T, typename... Params>
2828
void ParallelFor(uint32_t begin, uint32_t end, uint32_t n_tasks, T SerialFunction, Params&&... params) {
2929

3030
n_tasks = (n_tasks >= m_workers.size()) ? n_tasks : m_workers.size();
31-
m_tasksRemaining = n_tasks;
31+
std::future<void> futures[n_tasks];
3232
int chunk = (end - begin) / n_tasks;
3333
for (auto i = 0u; i < n_tasks; ++i) {
34-
AddTask([=]{
34+
std::promise<void> promise;
35+
futures[i] = promise.get_future();
36+
auto move_promise = make_move_on_copy(move(promise));
37+
AddTask([=] () mutable {
3538
uint32_t threadstart = begin + i*chunk;
3639
uint32_t threadstop = (i == n_tasks - 1) ? end : threadstart + chunk;
3740
for (uint32_t it = threadstart; it < threadstop; ++it) {
3841
SerialFunction(it, params...);
3942
}
40-
m_tasksRemaining--;
43+
move_promise.value().set_value();
4144
});
4245
}
43-
Finish();
46+
for (auto i = 0u; i<n_tasks; i++) {
47+
futures[i].get();
48+
}
4449
}
4550

4651
protected:
4752
// threads and task queue
4853
vector<thread> m_workers;
49-
atomic<int> m_tasksRemaining;
5054
bool m_stopWorkers;
5155
};
5256

src/ThreadPoolBase.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,3 @@
33
void ThreadPoolBase::JoinAll() {
44
for (auto& worker : m_workers) { worker.join(); }
55
}
6-
void ThreadPoolBase::Finish() {
7-
while (m_tasksRemaining > 0) {}
8-
}

0 commit comments

Comments
 (0)