Skip to content

Use mutex to ensure segment update and CRA read/write happens atomically #363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions include/acl_kernel_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "acl_hal.h"
#include "acl_types.h"

#include <mutex>
#include <optional>
#include <string>
#include <vector>
Expand All @@ -32,10 +33,19 @@ typedef struct {

// Accelerator details
unsigned int num_accel;
std::vector<std::vector<int>>
accel_job_ids; //[num_accel][accel_invoc_queue_depth]

// Circular buffer that implements hardware kernel invocation queue
// size: [num_accel][accel_invoc_queue_depth]
std::vector<std::vector<int>> accel_job_ids;
// Depth of hardware kernel invocation queue [num_accel]
std::vector<unsigned int> accel_invoc_queue_depth;
// Points to the last kernel that has been launched but not yet finished
// [num_accel]
std::vector<int> accel_queue_front;
// Points to the last empty slot on hardware kernel invocation queue
// where kernel at the next index is the next one to finish [num_accel]
std::vector<int> accel_queue_back;

std::vector<acl_kernel_if_addr_range> accel_csr;
std::vector<acl_kernel_if_addr_range> accel_perf_mon;
std::vector<unsigned int> accel_num_printfs;
Expand All @@ -51,6 +61,12 @@ typedef struct {

acl_bsp_io io;

// Acquired when any thread is trying to perform CRA non-ROM read or write.
// This is to ensure that CRA segment register write happens concurrently
// with the subsequent data read or write, so the data read or write goes
// to the CRA of the intended kernel.
std::mutex segment_mutex;

// csr_version is absent if there is no accelerators or cra_ring_root doesn't
// exist
std::optional<unsigned int> csr_version;
Expand All @@ -74,9 +90,6 @@ typedef struct {
// e.g., in a future code refactoring.
bool cra_ring_root_exist = false;

// Depth of hardware kernel invocation queue
std::vector<unsigned int> accel_invoc_queue_depth;

// Track which of the kernels is the autorun profiling kernel (-1 if none)
int autorun_profiling_kernel_id;

Expand Down
21 changes: 7 additions & 14 deletions include/acl_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,13 @@ static inline void acl_sig_finished() {
}

// Blocking/Unblocking signals (Only implemented for Linux)
#ifdef __linux__
extern ACL_TLS sigset_t acl_sigset;
static inline void acl_sig_block_signals() {
sigset_t mask;
if (sigfillset(&mask))
assert(0 && "Error in creating signal mask in status handler");
if (pthread_sigmask(SIG_BLOCK, &mask, &acl_sigset))
assert(0 && "Error in blocking signals in status handler");
}
static inline void acl_sig_unblock_signals() {
if (pthread_sigmask(SIG_SETMASK, &acl_sigset, NULL))
assert(0 && "Error in unblocking signals in status handler");
}
#endif
class acl_signal_blocker {
public:
acl_signal_blocker &operator=(const acl_signal_blocker &) = delete;
acl_signal_blocker(const acl_signal_blocker &) = delete;
acl_signal_blocker();
~acl_signal_blocker();
};

// -- global lock functions --

Expand Down
45 changes: 15 additions & 30 deletions src/acl_hal_mmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1985,13 +1985,13 @@ int acl_hal_mmd_program_device(unsigned int physical_device_id,

void acl_hal_mmd_kernel_interrupt(int handle_in, void *user_data) {
unsigned physical_device_id;
#ifdef __linux__

// Callbacks received from non-dma transfers.
// (those calls are not initiated by a signal handler, so we need to block all
// signals here to avoid simultaneous calls to signal handler.)
acl_sig_block_signals(); // Call before acl_sig_started. Must call
// acl_sig_unblock_signals after acl_sig_finished.
#endif
// Must instantiate before acl_sig_started, destruct after acl_sig_finished.
acl_signal_blocker sig_blocker;

acl_sig_started();
// NOTE: all exit points of this function must first call acl_sig_finished()

Expand All @@ -2005,10 +2005,6 @@ void acl_hal_mmd_kernel_interrupt(int handle_in, void *user_data) {
assert(acl_kernel_if_is_valid(&kern[physical_device_id]));
acl_kernel_if_update_status(&kern[physical_device_id]);
acl_sig_finished();
#ifdef __linux__
// Unblocking the signals we blocked
acl_sig_unblock_signals();
#endif
return;
}

Expand All @@ -2021,14 +2017,13 @@ void acl_hal_mmd_device_interrupt(int handle_in,
aocl_mmd_interrupt_info *data_in,
void *user_data) {
unsigned physical_device_id;
#ifdef __linux__

// Callbacks received from non-dma transfers.
//(those calls are not initiated by a signal handler, so we need to block all
// signals
// here to avoid simultaneous calls to signal handler.)
acl_sig_block_signals(); // Call before acl_sig_started. Must call
// acl_sig_unblock_signals after acl_sig_finished.
#endif
// (those calls are not initiated by a signal handler, so we need to block all
// signals here to avoid simultaneous calls to signal handler.)
// Must instantiate before acl_sig_started, destruct after acl_sig_finished.
acl_signal_blocker sig_blocker;

acl_sig_started();
// NOTE: all exit points of this function must first call acl_sig_finished()

Expand All @@ -2042,10 +2037,6 @@ void acl_hal_mmd_device_interrupt(int handle_in,
acl_device_update_fn(physical_device_id, data_in->exception_type,
data_in->user_private_info, data_in->user_cb);
acl_sig_finished();
#ifdef __linux__
// Unblocking the signals we blocked
acl_sig_unblock_signals();
#endif
return;
}

Expand All @@ -2056,14 +2047,12 @@ void acl_hal_mmd_device_interrupt(int handle_in,

void acl_hal_mmd_status_handler(int handle, void *user_data, aocl_mmd_op_t op,
int status) {
#ifdef __linux__
// Callbacks received from non-dma transfers.
//(those calls are not initiated by a signal handler, so we need to block all
// signals)
// here to avoid simultaneous calls to signal handler.)
acl_sig_block_signals(); // Call before acl_sig_started. Must call
// acl_sig_unblock_signals after acl_sig_finished.
#endif
// (those calls are not initiated by a signal handler, so we need to block all
// signals here to avoid simultaneous calls to signal handler.)
// Must instantiate before acl_sig_started, destruct after acl_sig_finished.
acl_signal_blocker sig_blocker;

acl_sig_started();
// NOTE: all exit points of this function must first call acl_sig_finished()
// Removing Windows warning
Expand All @@ -2073,10 +2062,6 @@ void acl_hal_mmd_status_handler(int handle, void *user_data, aocl_mmd_op_t op,
acl_event_update_fn((cl_event)op, CL_COMPLETE);

acl_sig_finished();
#ifdef __linux__
// Unblocking the signals we blocked
acl_sig_unblock_signals();
#endif
}

void acl_hal_mmd_register_callbacks(
Expand Down
43 changes: 25 additions & 18 deletions src/acl_kernel_if.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,11 @@ static uintptr_t acl_kernel_cra_set_segment_rom(acl_kernel_if *kern,
static int acl_kernel_cra_read(acl_kernel_if *kern, unsigned int accel_id,
unsigned int addr, unsigned int *val) {
assert(kern->cra_ring_root_exist);

// Need to block signals before acquiring mutex and unblock after releasing
acl_signal_blocker sig_blocker;
std::lock_guard<std::mutex> lock(kern->segment_mutex);

uintptr_t segment_offset = acl_kernel_cra_set_segment(kern, accel_id, addr);
acl_assert_locked_or_sig();
return acl_kernel_if_read_32b(
Expand All @@ -410,6 +415,11 @@ static int acl_kernel_cra_read(acl_kernel_if *kern, unsigned int accel_id,
int acl_kernel_cra_read_64b(acl_kernel_if *kern, unsigned int accel_id,
unsigned int addr, uint64_t *val) {
assert(kern->cra_ring_root_exist);

// Need to block signals before acquiring mutex and unblock after releasing
acl_signal_blocker sig_blocker;
std::lock_guard<std::mutex> lock(kern->segment_mutex);

uintptr_t segment_offset = acl_kernel_cra_set_segment(kern, accel_id, addr);
acl_assert_locked_or_sig();
return acl_kernel_if_read_64b(
Expand Down Expand Up @@ -462,6 +472,11 @@ static int acl_kernel_rom_cra_read_block(acl_kernel_if *kern, unsigned int addr,
static int acl_kernel_cra_write(acl_kernel_if *kern, unsigned int accel_id,
unsigned int addr, unsigned int val) {
assert(kern->cra_ring_root_exist);

// Need to block signals before acquiring mutex and unblock after releasing
acl_signal_blocker sig_blocker;
std::lock_guard<std::mutex> lock(kern->segment_mutex);

uintptr_t segment_offset = acl_kernel_cra_set_segment(kern, accel_id, addr);
acl_assert_locked_or_sig();
return acl_kernel_if_write_32b(
Expand All @@ -471,6 +486,11 @@ static int acl_kernel_cra_write(acl_kernel_if *kern, unsigned int accel_id,
static int acl_kernel_cra_write_64b(acl_kernel_if *kern, unsigned int accel_id,
unsigned int addr, uint64_t val) {
assert(kern->cra_ring_root_exist);

// Need to block signals before acquiring mutex and unblock after releasing
acl_signal_blocker sig_blocker;
std::lock_guard<std::mutex> lock(kern->segment_mutex);

uintptr_t segment_offset = acl_kernel_cra_set_segment(kern, accel_id, addr);
acl_assert_locked();
return acl_kernel_if_write_64b(
Expand All @@ -481,6 +501,11 @@ static int acl_kernel_cra_write_block(acl_kernel_if *kern,
unsigned int accel_id, unsigned int addr,
unsigned int *val, size_t size) {
assert(kern->cra_ring_root_exist);

// Need to block signals before acquiring mutex and unblock after releasing
acl_signal_blocker sig_blocker;
std::lock_guard<std::mutex> lock(kern->segment_mutex);

uintptr_t segment_offset = acl_kernel_cra_set_segment(kern, accel_id, addr);
uintptr_t logical_addr =
kern->accel_csr[accel_id].address + addr - OFFSET_KERNEL_CRA;
Expand Down Expand Up @@ -1470,16 +1495,6 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
ACL_KERNEL_IF_DEBUG_MSG_VERBOSE(kern, 5, ":: Updating kernel status.\n");
#endif

// Get the state of kernel_cra address span extender segment prior to IRQ in
// hardware If IRQ is received in middle of segment change, segment value in
// cache and hardware could go out of sync
unsigned int segment;
acl_kernel_if_read_32b(kern, OFFSET_KERNEL_CRA_SEGMENT, &segment);

// Zero upper 32-bits on 64-bit machines
kern->cur_segment = segment & 0xffffffff;
uintptr_t segment_pre_irq = kern->cur_segment;

// Check which accelerators are done and update their status appropriately
for (unsigned int accel_id = 0; accel_id < kern->num_accel; ++accel_id) {
int next_queue_back;
Expand Down Expand Up @@ -1552,14 +1567,6 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) {
}
}
}

// Restore value of kernel cra address span extender segment to that of prior
// to IRQ
if (kern->cur_segment != segment_pre_irq) {
acl_kernel_if_write_32b(kern, OFFSET_KERNEL_CRA_SEGMENT,
(unsigned int)segment_pre_irq);
kern->cur_segment = segment_pre_irq;
}
}

void acl_kernel_if_debug_dump_printf(acl_kernel_if *kern, unsigned k) {
Expand Down
21 changes: 21 additions & 0 deletions src/acl_thread.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2015-2021 Intel Corporation

Check warning on line 1 in src/acl_thread.cpp

View workflow job for this annotation

GitHub Actions / build

Coverage

56.9% (-13.3%)
// SPDX-License-Identifier: BSD-3-Clause

// External library headers.
Expand Down Expand Up @@ -135,6 +135,27 @@

#endif // !LINUX

// Blocking/Unblocking signals (Actual implementation only for Linux)
acl_signal_blocker::acl_signal_blocker() {
#ifdef __linux__
sigset_t mask;
if (sigfillset(&mask)) {
assert(0 && "Error in creating signal mask in status handler");
}
if (pthread_sigmask(SIG_BLOCK, &mask, &acl_sigset)) {
assert(0 && "Error in blocking signals in status handler");
}
#endif
}

acl_signal_blocker::~acl_signal_blocker() {
#ifdef __linux__
if (pthread_sigmask(SIG_SETMASK, &acl_sigset, NULL)) {
assert(0 && "Error in unblocking signals in status handler");
}
#endif
}

// Current thread releases mutex lock and sleeps briefly to allow other threads
// a chance to execute. This function is useful for multithreaded hosts with
// e.g. polling BSPs (using yield) to prevent one thread from hogging the mutex
Expand Down
Loading