Skip to content

[NFC][SYCL] Use context_impl & in sampler_impl ctor and near it #19153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions sycl/source/detail/sampler_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ sampler_impl::sampler_impl(coordinate_normalization_mode normalizationMode,
verifyProps(MPropList);
}

sampler_impl::sampler_impl(cl_sampler clSampler,
const ContextImplPtr &syclContext) {
const AdapterPtr &Adapter = syclContext->getAdapter();
sampler_impl::sampler_impl(cl_sampler clSampler, context_impl &syclContext) {
const AdapterPtr &Adapter = syclContext.getAdapter();
ur_sampler_handle_t Sampler{};
Adapter->call<UrApiKind::urSamplerCreateWithNativeHandle>(
reinterpret_cast<ur_native_handle_t>(clSampler),
syclContext->getHandleRef(), nullptr, &Sampler);
syclContext.getHandleRef(), nullptr, &Sampler);

MContextToSampler[syclContext] = Sampler;
MContextToSampler[syclContext.shared_from_this()] = Sampler;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::unordered_map::operator[] was performing a copy internally before, now it can just move (and rvalue-ref overload is C++11, so available to us), so no significant (if at all) overhead here.

bool NormalizedCoords;

Adapter->call<UrApiKind::urSamplerGetInfo>(
Expand Down Expand Up @@ -95,10 +94,14 @@ sampler_impl::~sampler_impl() {
}

ur_sampler_handle_t
sampler_impl::getOrCreateSampler(const ContextImplPtr &ContextImpl) {
sampler_impl::getOrCreateSampler(context_impl &ContextImpl) {
// Just for the `MContextToSampler` lookups. Could probably be changed once we
// move to C++20 and would have heterogeneous lookup.
std::shared_ptr<context_impl> ContextImplPtr = ContextImpl.shared_from_this();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is extra overhead, but we have unconditional mutex lock below (and another one near return on non-exceptional code path), so shouldn't matter.


{
std::lock_guard<std::mutex> Lock(MMutex);
auto It = MContextToSampler.find(ContextImpl);
auto It = MContextToSampler.find(ContextImplPtr);
if (It != MContextToSampler.end())
return It->second;
}
Expand Down Expand Up @@ -135,18 +138,18 @@ sampler_impl::getOrCreateSampler(const ContextImplPtr &ContextImpl) {

ur_result_t errcode_ret = UR_RESULT_SUCCESS;
ur_sampler_handle_t resultSampler = nullptr;
const AdapterPtr &Adapter = ContextImpl->getAdapter();
const AdapterPtr &Adapter = ContextImpl.getAdapter();

errcode_ret = Adapter->call_nocheck<UrApiKind::urSamplerCreate>(
ContextImpl->getHandleRef(), &desc, &resultSampler);
ContextImpl.getHandleRef(), &desc, &resultSampler);

if (errcode_ret == UR_RESULT_ERROR_UNSUPPORTED_FEATURE)
throw sycl::exception(sycl::errc::feature_not_supported,
"Images are not supported by this device.");

Adapter->checkUrResult(errcode_ret);
std::lock_guard<std::mutex> Lock(MMutex);
MContextToSampler[ContextImpl] = resultSampler;
MContextToSampler[ContextImplPtr] = resultSampler;

return resultSampler;
}
Expand Down
8 changes: 4 additions & 4 deletions sycl/source/detail/sampler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,22 @@ enum class coordinate_normalization_mode : unsigned int;
namespace detail {

class context_impl;
using ContextImplPtr = std::shared_ptr<context_impl>;

class sampler_impl {
public:
sampler_impl(coordinate_normalization_mode normalizationMode,
addressing_mode addressingMode, filtering_mode filteringMode,
const property_list &propList);

sampler_impl(cl_sampler clSampler, const ContextImplPtr &syclContext);
sampler_impl(cl_sampler clSampler, context_impl &syclContext);

addressing_mode get_addressing_mode() const;

filtering_mode get_filtering_mode() const;

coordinate_normalization_mode get_coordinate_normalization_mode() const;

ur_sampler_handle_t getOrCreateSampler(const ContextImplPtr &ContextImpl);
ur_sampler_handle_t getOrCreateSampler(context_impl &ContextImpl);

~sampler_impl();

Expand All @@ -56,7 +55,8 @@ class sampler_impl {
/// Protects all the fields that can be changed by class' methods.
std::mutex MMutex;

std::unordered_map<ContextImplPtr, ur_sampler_handle_t> MContextToSampler;
std::unordered_map<std::shared_ptr<context_impl>, ur_sampler_handle_t>
MContextToSampler;

coordinate_normalization_mode MCoordNormMode;
addressing_mode MAddrMode;
Expand Down
13 changes: 6 additions & 7 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2313,8 +2313,7 @@ void SetArgBasedOnType(
const AdapterPtr &Adapter, ur_kernel_handle_t Kernel,
const std::shared_ptr<device_image_impl> &DeviceImageImpl,
const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
const ContextImplPtr &ContextImpl, detail::ArgDesc &Arg,
size_t NextTrueIndex) {
context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex) {
switch (Arg.MType) {
case kernel_param_kind_t::kind_dynamic_work_group_memory:
break;
Expand Down Expand Up @@ -2442,7 +2441,7 @@ static ur_result_t SetKernelParamsAndLaunch(
auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc,
&Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) {
SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc,
Queue.getContextImplPtr(), Arg, NextTrueIndex);
Queue.getContextImpl(), Arg, NextTrueIndex);
};
applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc);
}
Expand Down Expand Up @@ -2530,7 +2529,7 @@ static ur_result_t SetKernelParamsAndLaunch(

static std::tuple<ur_kernel_handle_t, std::shared_ptr<device_image_impl>,
const KernelArgMask *>
getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl,
getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl,
device_impl &DeviceImpl,
std::vector<FastKernelCacheValPtr> &KernelCacheValsToRelease) {

Expand All @@ -2552,7 +2551,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl,
} else {
FastKernelCacheValPtr FastKernelCacheVal =
sycl::detail::ProgramManager::getInstance().getOrCreateKernel(
*ContextImpl, DeviceImpl, CommandGroup.MKernelName,
ContextImpl, DeviceImpl, CommandGroup.MKernelName,
CommandGroup.MKernelNameBasedCachePtr);
UrKernel = FastKernelCacheVal->MKernelHandle;
EliminatedArgMask = FastKernelCacheVal->MKernelArgMask;
Expand All @@ -2579,7 +2578,7 @@ ur_result_t enqueueImpCommandBufferKernel(
std::shared_ptr<device_image_impl> DeviceImageImpl = nullptr;
const KernelArgMask *EliminatedArgMask = nullptr;

auto ContextImpl = sycl::detail::getSyclObjImpl(Ctx);
context_impl &ContextImpl = *sycl::detail::getSyclObjImpl(Ctx);
std::tie(UrKernel, DeviceImageImpl, EliminatedArgMask) = getCGKernelInfo(
CommandGroup, ContextImpl, DeviceImpl, FastKernelCacheValsToRelease);

Expand All @@ -2599,7 +2598,7 @@ ur_result_t enqueueImpCommandBufferKernel(
AltUrKernels.push_back(AltUrKernel);
}

const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter();
const sycl::detail::AdapterPtr &Adapter = ContextImpl.getAdapter();
auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl, &ContextImpl,
&getMemAllocationFunc](sycl::detail::ArgDesc &Arg,
size_t NextTrueIndex) {
Expand Down
4 changes: 1 addition & 3 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ class context_impl;
class DispatchHostTask;

using EventImplPtr = std::shared_ptr<detail::event_impl>;
using ContextImplPtr = std::shared_ptr<detail::context_impl>;
using StreamImplPtr = std::shared_ptr<detail::stream_impl>;

class Command;
Expand Down Expand Up @@ -749,8 +748,7 @@ void SetArgBasedOnType(
const detail::AdapterPtr &Adapter, ur_kernel_handle_t Kernel,
const std::shared_ptr<device_image_impl> &DeviceImageImpl,
const std::function<void *(Requirement *Req)> &getMemAllocationFunc,
const ContextImplPtr &ContextImpl, detail::ArgDesc &Arg,
size_t NextTrueIndex);
context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex);

template <typename FuncT>
void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask,
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ sampler::sampler(coordinate_normalization_mode normalizationMode,

sampler::sampler(cl_sampler clSampler, const context &syclContext)
: impl(std::make_shared<detail::sampler_impl>(
clSampler, detail::getSyclObjImpl(syclContext))) {}
clSampler, *detail::getSyclObjImpl(syclContext))) {}

addressing_mode sampler::get_addressing_mode() const {
return impl->get_addressing_mode();
Expand Down