Skip to content

Commit 733d5e3

Browse files
authored
[SYCL] Implement SYCL-2020 reductions with read_write to reduction vars (#3315)
* [SYCL] Implement SYCL-2020 reductions with read_write access to reduction variable LIT tests: intel/llvm-test-suite#170 This patch: - adds SYCL-2020 property::reduction::initialize_to_identity - implements support for SYCL-2020 reductions for which initialize_to_identity is NOT used. Supported reductions correspond to read_write and USM ONEAPI::reductions. - sycl::reduction re-uses ONEAPI::reduction implementation/classes and automatically creates placeholder accessors for sycl::reduction called with sycl::buffer argument. - adds operator++ for reducer class defined in SYCL-2020 - fixes 2 errors in an ONEAPI::reduction used with placeholder accessors. The attribute "initialize_to_identity" is added but NOT supported yet. The multi-dimensional reductions are NOT supported yet. Reductions accepting sycl::span are not supported yet. Reductions cannot be yet used in parallel_for() accepting sycl::range Signed-off-by: Vyacheslav N Klochkov <vyacheslav.n.klochkov@intel.com>
1 parent cee7110 commit 733d5e3

File tree

9 files changed

+338
-54
lines changed

9 files changed

+338
-54
lines changed

sycl/include/CL/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <CL/sycl/properties/all_properties.hpp>
4646
#include <CL/sycl/queue.hpp>
4747
#include <CL/sycl/range.hpp>
48+
#include <CL/sycl/reduction.hpp>
4849
#include <CL/sycl/sampler.hpp>
4950
#include <CL/sycl/stream.hpp>
5051
#include <CL/sycl/types.hpp>

sycl/include/CL/sycl/ONEAPI/reduction.hpp

Lines changed: 113 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,20 @@ class reducer {
221221

222222
T getIdentity() const { return MIdentity; }
223223

224+
template <typename _T = T>
225+
enable_if_t<IsReduPlus<_T, BinaryOperation>::value &&
226+
sycl::detail::is_geninteger<_T>::value>
227+
operator++() {
228+
combine(static_cast<T>(1));
229+
}
230+
231+
template <typename _T = T>
232+
enable_if_t<IsReduPlus<_T, BinaryOperation>::value &&
233+
sycl::detail::is_geninteger<_T>::value>
234+
operator++(int) {
235+
combine(static_cast<T>(1));
236+
}
237+
224238
template <typename _T = T>
225239
enable_if_t<IsReduPlus<_T, BinaryOperation>::value>
226240
operator+=(const _T &Partial) {
@@ -293,6 +307,20 @@ class reducer<T, BinaryOperation,
293307
return known_identity_impl<_BinaryOperation, _T>::value;
294308
}
295309

310+
template <typename _T = T>
311+
enable_if_t<IsReduPlus<_T, BinaryOperation>::value &&
312+
sycl::detail::is_geninteger<_T>::value>
313+
operator++() {
314+
combine(static_cast<T>(1));
315+
}
316+
317+
template <typename _T = T>
318+
enable_if_t<IsReduPlus<_T, BinaryOperation>::value &&
319+
sycl::detail::is_geninteger<_T>::value>
320+
operator++(int) {
321+
combine(static_cast<T>(1));
322+
}
323+
296324
template <typename _T = T>
297325
enable_if_t<IsReduPlus<_T, BinaryOperation>::value>
298326
operator+=(const _T &Partial) {
@@ -419,7 +447,7 @@ class reduction_impl : private reduction_impl_base {
419447
ONEAPI::accessor_property_list<>>;
420448
using rw_accessor_type =
421449
accessor<T, Dims, access::mode::read_write, access::target::global_buffer,
422-
IsPlaceholder, ONEAPI::accessor_property_list<>>;
450+
access::placeholder::false_t, ONEAPI::accessor_property_list<>>;
423451
static constexpr access::mode accessor_mode = AccMode;
424452
static constexpr int accessor_dim = Dims;
425453
static constexpr int buffer_dim = (Dims == 0) ? 1 : Dims;
@@ -455,6 +483,20 @@ class reduction_impl : private reduction_impl_base {
455483
return MIdentity;
456484
}
457485

486+
/// SYCL-2020.
487+
/// Constructs reduction_impl when the identity value is statically known.
488+
template <typename _T, typename AllocatorT,
489+
std::enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * =
490+
nullptr>
491+
reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH)
492+
: MAcc(std::make_shared<accessor_type>(Buffer)),
493+
MIdentity(getIdentity()) {
494+
associateWithHandler(CGH);
495+
if (Buffer.get_count() != 1)
496+
throw runtime_error("Reduction variable must be a scalar.",
497+
PI_INVALID_VALUE);
498+
}
499+
458500
/// Constructs reduction_impl when the identity value is statically known.
459501
// Note that aliasing constructor was used to initialize MAcc to avoid
460502
// destruction of the object referenced by the parameter Acc.
@@ -465,8 +507,36 @@ class reduction_impl : private reduction_impl_base {
465507
: MAcc(shared_ptr_class<accessor_type>(shared_ptr_class<accessor_type>{},
466508
&Acc)),
467509
MIdentity(getIdentity()) {
468-
assert(Acc.get_count() == 1 &&
469-
"Only scalar/1-element reductions are supported now.");
510+
if (Acc.get_count() != 1)
511+
throw runtime_error("Reduction variable must be a scalar.",
512+
PI_INVALID_VALUE);
513+
}
514+
515+
/// SYCL-2020.
516+
/// Constructs reduction_impl when the identity value is statically known,
517+
/// and user still passed the identity value.
518+
template <
519+
typename _T, typename AllocatorT,
520+
enable_if_t<IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
521+
reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH,
522+
const T & /*Identity*/, BinaryOperation)
523+
: MAcc(std::make_shared<accessor_type>(Buffer)),
524+
MIdentity(getIdentity()) {
525+
associateWithHandler(CGH);
526+
if (Buffer.get_count() != 1)
527+
throw runtime_error("Reduction variable must be a scalar.",
528+
PI_INVALID_VALUE);
529+
// For now the implementation ignores the identity value given by user
530+
// when the implementation knows the identity.
531+
// The SPEC could prohibit passing identity parameter to operations with
532+
// known identity, but that could have some bad consequences too.
533+
// For example, at some moment the implementation may NOT know the identity
534+
// for COMPLEX-PLUS reduction. User may create a program that would pass
535+
// COMPLEX value (0,0) as identity for PLUS reduction. At some later moment
536+
// when the implementation starts handling COMPLEX-PLUS as known operation
537+
// the existing user's program remains compilable and working correctly.
538+
// I.e. with this constructor here, adding more reduction operations to the
539+
// list of known operations does not break the existing programs.
470540
}
471541

472542
/// Constructs reduction_impl when the identity value is statically known,
@@ -476,13 +546,13 @@ class reduction_impl : private reduction_impl_base {
476546
template <
477547
typename _T = T, class _BinaryOperation = BinaryOperation,
478548
enable_if_t<IsKnownIdentityOp<_T, _BinaryOperation>::value> * = nullptr>
479-
reduction_impl(accessor_type &Acc, const T &Identity, BinaryOperation)
549+
reduction_impl(accessor_type &Acc, const T & /*Identity*/, BinaryOperation)
480550
: MAcc(shared_ptr_class<accessor_type>(shared_ptr_class<accessor_type>{},
481551
&Acc)),
482552
MIdentity(getIdentity()) {
483-
(void)Identity;
484-
assert(Acc.get_count() == 1 &&
485-
"Only scalar/1-element reductions are supported now.");
553+
if (Acc.get_count() != 1)
554+
throw runtime_error("Reduction variable must be a scalar.",
555+
PI_INVALID_VALUE);
486556
// For now the implementation ignores the identity value given by user
487557
// when the implementation knows the identity.
488558
// The SPEC could prohibit passing identity parameter to operations with
@@ -496,6 +566,21 @@ class reduction_impl : private reduction_impl_base {
496566
// list of known operations does not break the existing programs.
497567
}
498568

569+
/// SYCL-2020.
570+
/// Constructs reduction_impl when the identity value is NOT known statically.
571+
template <
572+
typename _T, typename AllocatorT,
573+
enable_if_t<!IsKnownIdentityOp<_T, BinaryOperation>::value> * = nullptr>
574+
reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH,
575+
const T &Identity, BinaryOperation BOp)
576+
: MAcc(std::make_shared<accessor_type>(Buffer)), MIdentity(Identity),
577+
MBinaryOp(BOp) {
578+
associateWithHandler(CGH);
579+
if (Buffer.get_count() != 1)
580+
throw runtime_error("Reduction variable must be a scalar.",
581+
PI_INVALID_VALUE);
582+
}
583+
499584
/// Constructs reduction_impl when the identity value is unknown.
500585
// Note that aliasing constructor was used to initialize MAcc to avoid
501586
// destruction of the object referenced by the parameter Acc.
@@ -506,8 +591,9 @@ class reduction_impl : private reduction_impl_base {
506591
: MAcc(shared_ptr_class<accessor_type>(shared_ptr_class<accessor_type>{},
507592
&Acc)),
508593
MIdentity(Identity), MBinaryOp(BOp) {
509-
assert(Acc.get_count() == 1 &&
510-
"Only scalar/1-element reductions are supported now.");
594+
if (Acc.get_count() != 1)
595+
throw runtime_error("Reduction variable must be a scalar.",
596+
PI_INVALID_VALUE);
511597
}
512598

513599
/// Constructs reduction_impl when the identity value is statically known.
@@ -587,15 +673,29 @@ class reduction_impl : private reduction_impl_base {
587673
}
588674

589675
/// Constructs a new temporary buffer to hold partial sums and returns
590-
/// the accessor that that buffer.
591-
template <bool IsOneWG>
592-
std::enable_if_t<!IsOneWG, accessor_type>
676+
/// the accessor for that buffer. Non-placeholder case.
677+
template <bool IsOneWG, access::placeholder _IsPlaceholder = IsPlaceholder>
678+
std::enable_if_t<!IsOneWG && _IsPlaceholder == access::placeholder::false_t,
679+
accessor_type>
593680
getWriteMemForPartialReds(size_t Size, handler &CGH) {
594681
MOutBufPtr = std::make_shared<buffer<T, buffer_dim>>(range<1>(Size));
595682
CGH.addReduction(MOutBufPtr);
596683
return accessor_type(*MOutBufPtr, CGH);
597684
}
598685

686+
/// Constructs a new temporary buffer to hold partial sums and returns
687+
/// the accessor for that buffer. Placeholder case.
688+
template <bool IsOneWG, access::placeholder _IsPlaceholder = IsPlaceholder>
689+
std::enable_if_t<!IsOneWG && _IsPlaceholder == access::placeholder::true_t,
690+
accessor_type>
691+
getWriteMemForPartialReds(size_t Size, handler &CGH) {
692+
MOutBufPtr = std::make_shared<buffer<T, buffer_dim>>(range<1>(Size));
693+
CGH.addReduction(MOutBufPtr);
694+
accessor_type Acc(*MOutBufPtr);
695+
CGH.require(Acc);
696+
return Acc;
697+
}
698+
599699
template <access::placeholder _IsPlaceholder = IsPlaceholder>
600700
enable_if_t<_IsPlaceholder == access::placeholder::false_t, accessor_type>
601701
getWriteAccForPartialReds(size_t Size, handler &CGH) {
@@ -624,8 +724,7 @@ class reduction_impl : private reduction_impl_base {
624724

625725
/// Creates 1-element global buffer initialized with identity value and
626726
/// returns an accessor to that buffer.
627-
accessor<T, Dims, access::mode::read_write, access::target::global_buffer>
628-
getReadWriteScalarAcc(handler &CGH) const {
727+
rw_accessor_type getReadWriteScalarAcc(handler &CGH) const {
629728
auto RWReduVal = std::make_shared<T>(MIdentity);
630729
CGH.addReduction(RWReduVal);
631730
auto RWReduBuf =
@@ -1576,7 +1675,6 @@ template <typename T, class BinaryOperation, int Dims, access::mode AccMode,
15761675
detail::reduction_impl<T, BinaryOperation, Dims, false, AccMode, IsPH>
15771676
reduction(accessor<T, Dims, AccMode, access::target::global_buffer, IsPH> &Acc,
15781677
const T &Identity, BinaryOperation BOp) {
1579-
// The Combiner argument was needed only to define the BinaryOperation param.
15801678
return detail::reduction_impl<T, BinaryOperation, Dims, false, AccMode, IsPH>(
15811679
Acc, Identity, BOp);
15821680
}
@@ -1592,7 +1690,6 @@ std::enable_if_t<
15921690
detail::reduction_impl<T, BinaryOperation, Dims, false, AccMode, IsPH>>
15931691
reduction(accessor<T, Dims, AccMode, access::target::global_buffer, IsPH> &Acc,
15941692
BinaryOperation) {
1595-
// The Combiner argument was needed only to define the BinaryOperation param.
15961693
return detail::reduction_impl<T, BinaryOperation, Dims, false, AccMode, IsPH>(
15971694
Acc);
15981695
}
@@ -1643,29 +1740,5 @@ inline constexpr AccumulatorT known_identity_v =
16431740
known_identity<BinaryOperation, AccumulatorT>::value;
16441741
#endif
16451742
} // namespace ONEAPI
1646-
1647-
// Currently, the type traits defined below correspond to SYCL 1.2.1 ONEAPI
1648-
// reduction extension. That may be changed later when SYCL 2020 reductions
1649-
// are implemented.
1650-
template <typename BinaryOperation, typename AccumulatorT>
1651-
struct has_known_identity
1652-
: ONEAPI::has_known_identity<BinaryOperation, AccumulatorT> {};
1653-
1654-
#if __cplusplus >= 201703L
1655-
template <typename BinaryOperation, typename AccumulatorT>
1656-
inline constexpr bool has_known_identity_v =
1657-
has_known_identity<BinaryOperation, AccumulatorT>::value;
1658-
#endif
1659-
1660-
template <typename BinaryOperation, typename AccumulatorT>
1661-
struct known_identity : ONEAPI::known_identity<BinaryOperation, AccumulatorT> {
1662-
};
1663-
1664-
#if __cplusplus >= 201703L
1665-
template <typename BinaryOperation, typename AccumulatorT>
1666-
inline constexpr AccumulatorT known_identity_v =
1667-
known_identity<BinaryOperation, AccumulatorT>::value;
1668-
#endif
1669-
16701743
} // namespace sycl
16711744
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/include/CL/sycl/detail/properties_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::buffer::use_pinned_host_me
88
__SYCL_PARAM_TRAITS_SPEC(sycl::property::noinit)
99
__SYCL_PARAM_TRAITS_SPEC(sycl::property::context::cuda::use_primary_context)
1010
__SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order)
11+
__SYCL_PARAM_TRAITS_SPEC(sycl::property::reduction::initialize_to_identity)

sycl/include/CL/sycl/detail/property_helper.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ enum DataLessPropKind {
3131
NoInit = 4,
3232
BufferUsePinnedHostMemory = 5,
3333
UsePrimaryContext = 6,
34-
DataLessPropKindSize = 7
34+
DataLessPropKindSize = 7,
35+
InitializeToIdentity = 8
3536
};
3637

3738
// List of all properties with data IDs

sycl/include/CL/sycl/properties/all_properties.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
#include <CL/sycl/properties/context_properties.hpp>
44
#include <CL/sycl/properties/image_properties.hpp>
55
#include <CL/sycl/properties/queue_properties.hpp>
6+
#include <CL/sycl/properties/reduction_properties.hpp>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//==------- reduction_properties.hpp --- SYCL reduction properties ---------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <CL/sycl/context.hpp>
12+
#include <CL/sycl/detail/property_helper.hpp>
13+
14+
__SYCL_INLINE_NAMESPACE(cl) {
15+
namespace sycl {
16+
namespace property {
17+
namespace reduction {
18+
class initialize_to_identity
19+
: public detail::DataLessProperty<detail::InitializeToIdentity> {};
20+
} // namespace reduction
21+
} // namespace property
22+
} // namespace sycl
23+
} // __SYCL_INLINE_NAMESPACE(cl)

0 commit comments

Comments
 (0)