Skip to content

[DeviceSanitizer] Support detecting out-of-bounds errors on sycl::buffer #1533

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions source/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ if(UR_ENABLE_SANITIZER)
${CMAKE_CURRENT_SOURCE_DIR}/../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_libdevice.hpp
Expand Down
137 changes: 137 additions & 0 deletions source/loader/layers/sanitizer/asan_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
*
* Copyright (C) 2024 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* @file asan_buffer.cpp
*
*/

#include "asan_buffer.hpp"
#include "asan_interceptor.hpp"
#include "ur_sanitizer_layer.hpp"
#include "ur_sanitizer_utils.hpp"

namespace ur_sanitizer_layer {

ur_result_t EnqueueMemCopyRectHelper(
ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,
ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,
size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,
bool Blocking, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) {
// If user doesn't determine src/dst row pitch and slice pitch, just use
// region for it.
if (SrcRowPitch == 0) {
SrcRowPitch = Region.width;
}

if (SrcSlicePitch == 0) {
SrcSlicePitch = SrcRowPitch * Region.height;
}

if (DstRowPitch == 0) {
DstRowPitch = Region.width;
}

if (DstSlicePitch == 0) {
DstSlicePitch = DstRowPitch * Region.height;
}

// Calculate the src and dst addresses that actually will be copied.
char *SrcOrigin = pSrc + SrcOffset.x + SrcRowPitch * SrcOffset.y +
SrcSlicePitch * SrcOffset.z;
char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y +
DstSlicePitch * DstOffset.z;

std::vector<ur_event_handle_t> Events;
Events.reserve(Region.depth);
// For now, USM doesn't support 3D memory copy operation, so we can only
// loop call 2D memory copy function to implement it.
for (size_t i = 0; i < Region.depth; i++) {
ur_event_handle_t NewEvent{};
UR_CALL(context.urDdiTable.Enqueue.pfnUSMMemcpy2D(
Queue, Blocking, DstOrigin + (i * DstSlicePitch), DstRowPitch,
SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width,
Region.height, NumEventsInWaitList, EventWaitList, &NewEvent));

Events.push_back(NewEvent);
}

UR_CALL(context.urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(),
Events.data(), Event));

return UR_RESULT_SUCCESS;
}

ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
// Sub-buffers don't maintain own allocations but rely on parent buffer.
if (SubBuffer) {
UR_CALL(SubBuffer->Parent->getHandle(Device, Handle));
Handle += SubBuffer->Origin;
return UR_RESULT_SUCCESS;
}

auto &Allocation = Allocations[Device];
if (!Allocation) {
ur_usm_desc_t USMDesc{};
USMDesc.align = getAlignment();
ur_usm_pool_handle_t Pool{};
ur_result_t URes = context.interceptor->allocateMemory(
Context, Device, &USMDesc, Pool, Size, AllocType::MEM_BUFFER,
ur_cast<void **>(&Allocation));
if (URes != UR_RESULT_SUCCESS) {
context.logger.error(
"Failed to allocate {} bytes memory for buffer {}", Size, this);
return URes;
}

if (HostPtr) {
ManagedQueue Queue(Context, Device);
URes = context.urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
if (URes != UR_RESULT_SUCCESS) {
context.logger.error("Failed to copy {} bytes data from host "
"pointer {} to buffer {}",
Size, HostPtr, this);
return URes;
}
}
}

Handle = Allocation;

return UR_RESULT_SUCCESS;
}

ur_result_t MemBuffer::free() {
for (const auto &[_, Ptr] : Allocations) {
ur_result_t URes = context.interceptor->releaseMemory(Context, Ptr);
if (URes != UR_RESULT_SUCCESS) {
context.logger.error("Failed to free buffer handle {}", Ptr);
return URes;
}
}
Allocations.clear();
return UR_RESULT_SUCCESS;
}

size_t MemBuffer::getAlignment() {
// Choose an alignment that is at most 128 and is the next power of 2
// for sizes less than 128.
// TODO: If we don't set the alignment size explicitly, the device will
// usually choose a very large size (more than 1k). Then sanitizer will
// allocate extra unnessary memory. Not sure if this will impact
// performance.
size_t MsbIdx = 63 - __builtin_clz(Size);
size_t Alignment = (1 << (MsbIdx + 1));
if (Alignment > 128) {
Alignment = 128;
}
return Alignment;
}

} // namespace ur_sanitizer_layer
74 changes: 74 additions & 0 deletions source/loader/layers/sanitizer/asan_buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
*
* Copyright (C) 2024 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* @file asan_buffer.hpp
*
*/

#pragma once

#include <atomic>
#include <memory>
#include <optional>

#include "common.hpp"

namespace ur_sanitizer_layer {

struct MemBuffer {
// Buffer constructor
MemBuffer(ur_context_handle_t Context, size_t Size, char *HostPtr)
: Context(Context), Size(Size), HostPtr(HostPtr) {}

// Sub-buffer constructor
MemBuffer(std::shared_ptr<MemBuffer> Parent, size_t Origin, size_t Size)
: Context(Parent->Context), Size(Size), SubBuffer{{Parent, Origin}} {}

ur_result_t getHandle(ur_device_handle_t Device, char *&Handle);

ur_result_t free();

size_t getAlignment();

std::unordered_map<ur_device_handle_t, char *> Allocations;

enum AccessMode { UNKNOWN, READ_WRITE, READ_ONLY, WRITE_ONLY };

struct Mapping {
size_t Offset;
size_t Size;
};

std::unordered_map<void *, Mapping> Mappings;

ur_context_handle_t Context;

size_t Size;

char *HostPtr{};

struct SubBuffer_t {
std::shared_ptr<MemBuffer> Parent;
size_t Origin;
};

std::optional<SubBuffer_t> SubBuffer;

std::atomic<int32_t> RefCount = 1;

ur_shared_mutex Mutex;
};

ur_result_t EnqueueMemCopyRectHelper(
ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,
ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,
size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,
bool Blocking, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event);

} // namespace ur_sanitizer_layer
44 changes: 44 additions & 0 deletions source/loader/layers/sanitizer/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ ur_result_t SanitizerInterceptor::allocateMemory(
} else if (Type == AllocType::SHARED_USM) {
UR_CALL(context.urDdiTable.USM.pfnSharedAlloc(
Context, Device, Properties, Pool, NeededSize, &Allocated));
} else if (Type == AllocType::MEM_BUFFER) {
UR_CALL(context.urDdiTable.USM.pfnDeviceAlloc(
Context, Device, Properties, Pool, NeededSize, &Allocated));
} else {
context.logger.error("Unsupport memory type");
return UR_RESULT_ERROR_INVALID_ARGUMENT;
Expand Down Expand Up @@ -662,13 +665,54 @@ ur_result_t SanitizerInterceptor::eraseKernel(ur_kernel_handle_t Kernel) {
return UR_RESULT_SUCCESS;
}

ur_result_t
SanitizerInterceptor::insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer) {
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
assert(m_MemBufferMap.find(ur_cast<ur_mem_handle_t>(MemBuffer.get())) ==
m_MemBufferMap.end());
m_MemBufferMap.emplace(reinterpret_cast<ur_mem_handle_t>(MemBuffer.get()),
MemBuffer);
return UR_RESULT_SUCCESS;
}

ur_result_t SanitizerInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) {
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
assert(m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end());
m_MemBufferMap.erase(MemHandle);
return UR_RESULT_SUCCESS;
}

std::shared_ptr<MemBuffer>
SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
std::shared_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
if (m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()) {
return m_MemBufferMap[MemHandle];
}
return nullptr;
}

ur_result_t SanitizerInterceptor::prepareLaunch(
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
USMLaunchInfo &LaunchInfo) {
auto Program = GetProgram(Kernel);

do {
// Set membuffer arguments
auto KernelInfo = getKernelInfo(Kernel);
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
char *ArgPointer = nullptr;
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
ur_result_t URes = context.urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgIndex, nullptr, &ArgPointer);
if (URes != UR_RESULT_SUCCESS) {
context.logger.error(
"Failed to set buffer {} as the {} arg to kernel {}: {}",
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
URes);
}
}

// Set launch info argument
auto ArgNums = GetKernelNumArgs(Kernel);
if (ArgNums) {
Expand Down
13 changes: 12 additions & 1 deletion source/loader/layers/sanitizer/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#pragma once

#include "asan_allocator.hpp"
#include "asan_buffer.hpp"
#include "asan_libdevice.hpp"
#include "common.hpp"
#include "ur_sanitizer_layer.hpp"
Expand Down Expand Up @@ -81,8 +82,10 @@ struct QueueInfo {

struct KernelInfo {
ur_kernel_handle_t Handle;

ur_shared_mutex Mutex;
std::atomic<int32_t> RefCount = 1;
std::unordered_map<uint32_t, std::shared_ptr<MemBuffer>> BufferArgs;

// Need preserve the order of local arguments
std::map<uint32_t, LocalArgsInfo> LocalArgs;

Expand Down Expand Up @@ -194,6 +197,10 @@ class SanitizerInterceptor {
ur_result_t insertKernel(ur_kernel_handle_t Kernel);
ur_result_t eraseKernel(ur_kernel_handle_t Kernel);

ur_result_t insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer);
ur_result_t eraseMemBuffer(ur_mem_handle_t MemHandle);
std::shared_ptr<MemBuffer> getMemBuffer(ur_mem_handle_t MemHandle);

std::optional<AllocationIterator> findAllocInfoByAddress(uptr Address);

std::shared_ptr<ContextInfo> getContextInfo(ur_context_handle_t Context) {
Expand Down Expand Up @@ -245,6 +252,10 @@ class SanitizerInterceptor {
m_KernelMap;
ur_shared_mutex m_KernelMapMutex;

std::unordered_map<ur_mem_handle_t, std::shared_ptr<MemBuffer>>
m_MemBufferMap;
ur_shared_mutex m_MemBufferMapMutex;

/// Assumption: all USM chunks are allocated in one VA
AllocationMap m_AllocationMap;
ur_shared_mutex m_AllocationMapMutex;
Expand Down
Loading