Skip to content

[DeviceSanitizer] Check out-of-bounds on sycl::local_accessor #1532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 98 additions & 29 deletions source/loader/layers/sanitizer/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ namespace ur_sanitizer_layer {

namespace {

constexpr auto kSPIR_DeviceSanitizerReportMem = "__DeviceSanitizerReportMem";

uptr MemToShadow_CPU(uptr USM_SHADOW_BASE, uptr UPtr) {
return USM_SHADOW_BASE + (UPtr >> 3);
}
Expand Down Expand Up @@ -348,11 +346,14 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context,

ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
LaunchInfo &LaunchInfo) {
USMLaunchInfo &LaunchInfo) {
auto Context = GetContext(Queue);
auto Device = GetDevice(Queue);
auto ContextInfo = getContextInfo(Context);
auto DeviceInfo = getDeviceInfo(Device);
auto KernelInfo = getKernelInfo(Kernel);

UR_CALL(LaunchInfo.updateKernelInfo(*KernelInfo.get()));

ManagedQueue InternalQueue(Context, Device);
if (!InternalQueue) {
Expand All @@ -370,23 +371,12 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,

ur_result_t SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
ur_event_handle_t &Event,
LaunchInfo &LaunchInfo) {
auto Program = GetProgram(Kernel);
ur_event_handle_t ReadEvent{};

// If kernel has defined SPIR_DeviceSanitizerReportMem, then we try to read it
// to host, but it's okay that it isn't defined
USMLaunchInfo &LaunchInfo) {
// FIXME: We must use block operation here, until we support urEventSetCallback
auto Result = context.urDdiTable.Enqueue.pfnDeviceGlobalVariableRead(
Queue, Program, kSPIR_DeviceSanitizerReportMem, true,
sizeof(LaunchInfo.SPIR_DeviceSanitizerReportMem), 0,
&LaunchInfo.SPIR_DeviceSanitizerReportMem, 1, &Event, &ReadEvent);
auto Result = context.urDdiTable.Queue.pfnFinish(Queue);

if (Result == UR_RESULT_SUCCESS) {
Event = ReadEvent;

const auto &AH = LaunchInfo.SPIR_DeviceSanitizerReportMem;
const auto &AH = LaunchInfo.Data->SanitizerReport;
if (!AH.Flag) {
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -627,13 +617,44 @@ ur_result_t SanitizerInterceptor::eraseDevice(ur_device_handle_t Device) {
return UR_RESULT_SUCCESS;
}

ur_result_t SanitizerInterceptor::insertKernel(ur_kernel_handle_t Kernel) {
std::scoped_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
if (m_KernelMap.find(Kernel) != m_KernelMap.end()) {
return UR_RESULT_SUCCESS;
}
m_KernelMap.emplace(Kernel, std::make_shared<KernelInfo>(Kernel));
return UR_RESULT_SUCCESS;
}

ur_result_t SanitizerInterceptor::eraseKernel(ur_kernel_handle_t Kernel) {
std::scoped_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
assert(m_KernelMap.find(Kernel) != m_KernelMap.end());
m_KernelMap.erase(Kernel);
return UR_RESULT_SUCCESS;
}

ur_result_t SanitizerInterceptor::prepareLaunch(
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
LaunchInfo &LaunchInfo) {
USMLaunchInfo &LaunchInfo) {
auto Program = GetProgram(Kernel);

do {
// Set launch info argument
auto ArgNums = GetKernelNumArgs(Kernel);
if (ArgNums) {
context.logger.debug(
"launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data, LaunchInfo.Data->NumLocalArgs,
(void *)LaunchInfo.Data->LocalArgs);
ur_result_t URes = context.urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgNums - 1, nullptr, &LaunchInfo.Data);
if (URes != UR_RESULT_SUCCESS) {
context.logger.error("Failed to set launch info: {}", URes);
return URes;
}
}

// Write global variable to program
auto EnqueueWriteGlobal = [Queue, Program](const char *Name,
const void *Value,
Expand Down Expand Up @@ -723,15 +744,17 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
"LocalShadowMemorySize={})",
NumWG, LocalMemorySize, LocalShadowMemorySize);

UR_CALL(EnqueueAllocateDevice(LocalShadowMemorySize,
LaunchInfo.LocalShadowOffset));
UR_CALL(EnqueueAllocateDevice(
LocalShadowMemorySize, LaunchInfo.Data->LocalShadowOffset));

LaunchInfo.LocalShadowOffsetEnd =
LaunchInfo.LocalShadowOffset + LocalShadowMemorySize - 1;
LaunchInfo.Data->LocalShadowOffsetEnd =
LaunchInfo.Data->LocalShadowOffset + LocalShadowMemorySize -
1;

context.logger.info("ShadowMemory(Local, {} - {})",
(void *)LaunchInfo.LocalShadowOffset,
(void *)LaunchInfo.LocalShadowOffsetEnd);
context.logger.info(
"ShadowMemory(Local, {} - {})",
(void *)LaunchInfo.Data->LocalShadowOffset,
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
}
}
} while (false);
Expand All @@ -749,15 +772,61 @@ SanitizerInterceptor::findAllocInfoByAddress(uptr Address) {
return --It;
}

LaunchInfo::~LaunchInfo() {
ur_result_t USMLaunchInfo::initialize() {
UR_CALL(context.urDdiTable.Context.pfnRetain(Context));
UR_CALL(context.urDdiTable.Device.pfnRetain(Device));
UR_CALL(context.urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(LaunchInfo), (void **)&Data));
*Data = LaunchInfo{};
return UR_RESULT_SUCCESS;
}

ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) {
auto NumArgs = KI.LocalArgs.size();
if (NumArgs) {
Data->NumLocalArgs = NumArgs;
UR_CALL(context.urDdiTable.USM.pfnSharedAlloc(
Context, Device, nullptr, nullptr, sizeof(LocalArgsInfo) * NumArgs,
(void **)&Data->LocalArgs));
uint32_t i = 0;
for (auto [ArgIndex, ArgInfo] : KI.LocalArgs) {
Data->LocalArgs[i++] = ArgInfo;
context.logger.debug(
"local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex,
ArgInfo.Size, ArgInfo.SizeWithRedZone);
}
}
return UR_RESULT_SUCCESS;
}

USMLaunchInfo::~USMLaunchInfo() {
[[maybe_unused]] ur_result_t Result;
if (LocalShadowOffset) {
Result =
context.urDdiTable.USM.pfnFree(Context, (void *)LocalShadowOffset);
if (Data) {
auto Type = GetDeviceType(Device);
if (Type == DeviceType::GPU_PVC) {
if (Data->PrivateShadowOffset) {
Result = context.urDdiTable.USM.pfnFree(
Context, (void *)Data->PrivateShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
if (Data->LocalShadowOffset) {
Result = context.urDdiTable.USM.pfnFree(
Context, (void *)Data->LocalShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
}
if (Data->LocalArgs) {
Result = context.urDdiTable.USM.pfnFree(Context,
(void *)Data->LocalArgs);
assert(Result == UR_RESULT_SUCCESS);
}
Result = context.urDdiTable.USM.pfnFree(Context, (void *)Data);
assert(Result == UR_RESULT_SUCCESS);
}
Result = context.urDdiTable.Context.pfnRelease(Context);
assert(Result == UR_RESULT_SUCCESS);
Result = context.urDdiTable.Device.pfnRelease(Device);
assert(Result == UR_RESULT_SUCCESS);
}

} // namespace ur_sanitizer_layer
76 changes: 53 additions & 23 deletions source/loader/layers/sanitizer/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,26 @@ struct QueueInfo {
}
};

struct KernelInfo {
ur_kernel_handle_t Handle;

ur_shared_mutex Mutex;
// Need preserve the order of local arguments
std::map<uint32_t, LocalArgsInfo> LocalArgs;

explicit KernelInfo(ur_kernel_handle_t Kernel) : Handle(Kernel) {
[[maybe_unused]] auto Result =
context.urDdiTable.Kernel.pfnRetain(Kernel);
assert(Result == UR_RESULT_SUCCESS);
}

~KernelInfo() {
[[maybe_unused]] auto Result =
context.urDdiTable.Kernel.pfnRelease(Handle);
assert(Result == UR_RESULT_SUCCESS);
}
};

Comment on lines +82 to +101
Copy link
Contributor Author

@AllanZyne AllanZyne Apr 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note: KernelInfo related codes are duplicated with sycl::buffer PR #1533 (under review).

struct ContextInfo {
ur_context_handle_t Handle;

Expand Down Expand Up @@ -107,31 +127,30 @@ struct ContextInfo {
}
};

struct LaunchInfo {
uptr LocalShadowOffset = 0;
uptr LocalShadowOffsetEnd = 0;
DeviceSanitizerReport SPIR_DeviceSanitizerReportMem;
struct USMLaunchInfo {
LaunchInfo *Data;

ur_context_handle_t Context = nullptr;
ur_device_handle_t Device = nullptr;
const size_t *GlobalWorkSize = nullptr;
const size_t *GlobalWorkOffset = nullptr;
std::vector<size_t> LocalWorkSize;
uint32_t WorkDim = 0;

LaunchInfo(ur_context_handle_t Context, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize, const size_t *GlobalWorkOffset,
uint32_t WorkDim)
: Context(Context), GlobalWorkSize(GlobalWorkSize),
USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device,
const size_t *GlobalWorkSize, const size_t *LocalWorkSize,
const size_t *GlobalWorkOffset, uint32_t WorkDim)
: Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize),
GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) {
[[maybe_unused]] auto Result =
context.urDdiTable.Context.pfnRetain(Context);
assert(Result == UR_RESULT_SUCCESS);
if (LocalWorkSize) {
this->LocalWorkSize =
std::vector<size_t>(LocalWorkSize, LocalWorkSize + WorkDim);
}
}
~LaunchInfo();
~USMLaunchInfo();

ur_result_t initialize();
ur_result_t updateKernelInfo(const KernelInfo &KI);
};

struct DeviceGlobalInfo {
Expand All @@ -158,12 +177,11 @@ class SanitizerInterceptor {

ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
LaunchInfo &LaunchInfo);
USMLaunchInfo &LaunchInfo);

ur_result_t postLaunchKernel(ur_kernel_handle_t Kernel,
ur_queue_handle_t Queue,
ur_event_handle_t &Event,
LaunchInfo &LaunchInfo);
USMLaunchInfo &LaunchInfo);

ur_result_t insertContext(ur_context_handle_t Context,
std::shared_ptr<ContextInfo> &CI);
Expand All @@ -173,6 +191,9 @@ class SanitizerInterceptor {
std::shared_ptr<DeviceInfo> &CI);
ur_result_t eraseDevice(ur_device_handle_t Device);

ur_result_t insertKernel(ur_kernel_handle_t Kernel);
ur_result_t eraseKernel(ur_kernel_handle_t Kernel);

std::optional<AllocationIterator> findAllocInfoByAddress(uptr Address);

std::shared_ptr<ContextInfo> getContextInfo(ur_context_handle_t Context) {
Expand All @@ -181,6 +202,18 @@ class SanitizerInterceptor {
return m_ContextMap[Context];
}

std::shared_ptr<DeviceInfo> getDeviceInfo(ur_device_handle_t Device) {
std::shared_lock<ur_shared_mutex> Guard(m_DeviceMapMutex);
assert(m_DeviceMap.find(Device) != m_DeviceMap.end());
return m_DeviceMap[Device];
}

std::shared_ptr<KernelInfo> getKernelInfo(ur_kernel_handle_t Kernel) {
std::shared_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
assert(m_KernelMap.find(Kernel) != m_KernelMap.end());
return m_KernelMap[Kernel];
}

private:
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,
Expand All @@ -195,26 +228,23 @@ class SanitizerInterceptor {
std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue,
ur_kernel_handle_t Kernel,
LaunchInfo &LaunchInfo);
USMLaunchInfo &LaunchInfo);

ur_result_t allocShadowMemory(ur_context_handle_t Context,
std::shared_ptr<DeviceInfo> &DeviceInfo);

std::shared_ptr<DeviceInfo> getDeviceInfo(ur_device_handle_t Device) {
std::shared_lock<ur_shared_mutex> Guard(m_DeviceMapMutex);
assert(m_DeviceMap.find(Device) != m_DeviceMap.end());
return m_DeviceMap[Device];
}

private:
std::unordered_map<ur_context_handle_t, std::shared_ptr<ContextInfo>>
m_ContextMap;
ur_shared_mutex m_ContextMapMutex;

std::unordered_map<ur_device_handle_t, std::shared_ptr<DeviceInfo>>
m_DeviceMap;
ur_shared_mutex m_DeviceMapMutex;

std::unordered_map<ur_kernel_handle_t, std::shared_ptr<KernelInfo>>
m_KernelMap;
ur_shared_mutex m_KernelMapMutex;

/// Assumption: all USM chunks are allocated in one VA
AllocationMap m_AllocationMap;
ur_shared_mutex m_AllocationMapMutex;
Expand Down
17 changes: 17 additions & 0 deletions source/loader/layers/sanitizer/asan_libdevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ struct DeviceSanitizerReport {
bool IsRecover = false;
};

struct LocalArgsInfo {
uint64_t Size = 0;
uint64_t SizeWithRedZone = 0;
};

struct LaunchInfo {
uintptr_t PrivateShadowOffset =
0; // don't move this field, we use it in AddressSanitizerPass
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to help me better understand the above comment, can we move other fields?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you can reorder other fields except PrivateShadowOffset, because we assume PrivateShadowOffset is the first field in ASanPass.


uintptr_t LocalShadowOffset = 0;
uintptr_t LocalShadowOffsetEnd = 0;
DeviceSanitizerReport SanitizerReport;

uint32_t NumLocalArgs = 0;
LocalArgsInfo *LocalArgs = nullptr; // ordered by ArgIndex
};

constexpr unsigned ASAN_SHADOW_SCALE = 3;
constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE;

Expand Down
Loading