Skip to content

Commit e613fb3

Browse files
[SYCL] Rework 'half' implementation in order to remove bunch of workarounds (#1089)
Because of the fact, that `half` type is not a standard C++ type and it is not supported everywhere, its implementation differs between host and device: C++ class with overloaded arithmetic operators is used on host and `_Float16` is used on device side. Previously, the switch between two version was implemented as preprocessor macro and having two different types caused some problems with integration header and unnamed lambda feature, see #185 and #960. This patch redesigned `half` implementation in a way, that single wrapper data type is used as `half` representation on both host and device sides; differentiation between actual host and device implementations is done under the hood of this wrapper. Signed-off-by: Alexey Sachkov <alexey.sachkov@intel.com>
1 parent f39f47e commit e613fb3

File tree

15 files changed

+235
-239
lines changed

15 files changed

+235
-239
lines changed

clang/lib/AST/ItaniumMangle.cpp

Lines changed: 0 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2456,67 +2456,6 @@ static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty,
24562456
return true;
24572457
}
24582458

2459-
namespace {
2460-
struct DeclContextDesc {
2461-
Decl::Kind DeclKind;
2462-
StringRef Name;
2463-
};
2464-
} // namespace
2465-
2466-
// For Scopes argument, the only supported Decl::Kind values are:
2467-
// - Namespace
2468-
// - CXXRecord
2469-
// - ClassTemplateSpecialization
2470-
static bool matchQualifiedTypeName(const QualType &Ty,
2471-
ArrayRef<DeclContextDesc> Scopes) {
2472-
// The idea: check the declaration context chain starting from the type
2473-
// itself. At each step check the context is of expected kind
2474-
// (namespace) and name.
2475-
const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl();
2476-
2477-
if (!RecTy)
2478-
return false; // only classes/structs supported
2479-
const auto *Ctx = dyn_cast<DeclContext>(RecTy);
2480-
2481-
for (const auto &Scope : llvm::reverse(Scopes)) {
2482-
Decl::Kind DK = Ctx->getDeclKind();
2483-
StringRef Name = "";
2484-
2485-
if (DK != Scope.DeclKind)
2486-
return false;
2487-
2488-
switch (DK) {
2489-
case Decl::Kind::ClassTemplateSpecialization:
2490-
// ClassTemplateSpecializationDecl inherits from CXXRecordDecl
2491-
case Decl::Kind::CXXRecord:
2492-
Name = cast<CXXRecordDecl>(Ctx)->getName();
2493-
break;
2494-
case Decl::Kind::Namespace:
2495-
Name = cast<NamespaceDecl>(Ctx)->getName();
2496-
break;
2497-
default:
2498-
return false;
2499-
}
2500-
if (Name != Scope.Name)
2501-
return false;
2502-
Ctx = Ctx->getParent();
2503-
}
2504-
return Ctx->isTranslationUnit();
2505-
}
2506-
2507-
static bool isSYCLHostHalfType(const Type *Ty) {
2508-
// FIXME: this is not really portable, since the bunch of namespace below
2509-
// is not specified by the SYCL standard and highly depends on particular
2510-
// implementation
2511-
static const std::array<DeclContextDesc, 5> Scopes = {
2512-
DeclContextDesc{Decl::Kind::Namespace, "cl"},
2513-
DeclContextDesc{Decl::Kind::Namespace, "sycl"},
2514-
DeclContextDesc{Decl::Kind::Namespace, "detail"},
2515-
DeclContextDesc{Decl::Kind::Namespace, "half_impl"},
2516-
DeclContextDesc{Decl::Kind::CXXRecord, "half"}};
2517-
return matchQualifiedTypeName(QualType(Ty, 0), Scopes);
2518-
}
2519-
25202459
void CXXNameMangler::mangleType(QualType T) {
25212460
// If our type is instantiation-dependent but not dependent, we mangle
25222461
// it as it was written in the source, removing any top-level sugar.
@@ -2576,11 +2515,6 @@ void CXXNameMangler::mangleType(QualType T) {
25762515

25772516
bool isSubstitutable =
25782517
isTypeSubstitutable(quals, ty, Context.getASTContext());
2579-
if (Context.isUniqueNameMangler() && isSYCLHostHalfType(ty)) {
2580-
// Set isSubstitutable to false for cl::sycl::detail::half_impl::half
2581-
// to achieve the same mangling for other components
2582-
isSubstitutable = false;
2583-
}
25842518
if (isSubstitutable && mangleSubstitution(T))
25852519
return;
25862520

@@ -3057,11 +2991,6 @@ void CXXNameMangler::mangleType(const RecordType *T) {
30572991
mangleType(static_cast<const TagType*>(T));
30582992
}
30592993
void CXXNameMangler::mangleType(const TagType *T) {
3060-
if (Context.isUniqueNameMangler() && isSYCLHostHalfType(T)) {
3061-
// Mangle cl::sycl::detail::half_imple::half as _Float16
3062-
mangleType(Context.getASTContext().Float16Ty);
3063-
return;
3064-
}
30652994
mangleName(T->getDecl());
30662995
}
30672996

clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp

Lines changed: 0 additions & 68 deletions
This file was deleted.

sycl/include/CL/sycl/aliases.hpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ class half;
2424
} // namespace sycl
2525
} // namespace cl
2626

27-
#ifdef __SYCL_DEVICE_ONLY__
28-
using half = _Float16;
29-
#else
27+
// FIXME: line below exports 'half' into global namespace, which seems incorrect
28+
// However, SYCL 1.2.1 spec considers 'half' to be a fundamental C++ data type
29+
// which doesn't exist within the 'cl::sycl' namespace.
30+
// Related spec issue: KhronosGroup/SYCL-Docs#40
3031
using half = cl::sycl::detail::half_impl::half;
31-
#endif
3232

3333
#define MAKE_VECTOR_ALIAS(ALIAS, TYPE, N) \
3434
using ALIAS##N = cl::sycl::vec<TYPE, N>;
@@ -80,7 +80,8 @@ using ulong = unsigned long;
8080
using longlong = long long;
8181
using ulonglong = unsigned long long;
8282
// TODO cl::sycl::half is not in SYCL specification, but is used by Khronos CTS.
83-
using half = half;
83+
// Related tests issue: KhronosGroup/SYCL-CTS#37
84+
using half = cl::sycl::detail::half_impl::half;
8485
using cl_bool = bool;
8586
using cl_char = std::int8_t;
8687
using cl_uchar = std::uint8_t;

sycl/include/CL/sycl/detail/boolean.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ template <int N> struct Boolean {
9999

100100
private:
101101
template <int Num> friend struct Assigner;
102-
alignas(VectorAlignment<bool, N>::value) DataType value;
102+
alignas(detail::vector_alignment<bool, N>::value) DataType value;
103103
};
104104

105105
template <> struct Boolean<1> {

sycl/include/CL/sycl/detail/generic_type_lists.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,20 @@
1111
#include <CL/sycl/access/access.hpp>
1212
#include <CL/sycl/detail/stl_type_traits.hpp>
1313
#include <CL/sycl/detail/type_list.hpp>
14-
#include <CL/sycl/half_type.hpp>
1514

1615
// Generic type name description, which serves as a description for all valid
1716
// types of parameters to kernel functions
1817

19-
// Forward declaration
18+
// Forward declarations
2019
__SYCL_INLINE namespace cl {
2120
namespace sycl {
2221
template <typename T, int N> class vec;
22+
namespace detail {
23+
namespace half_impl {
24+
class half;
25+
}
26+
} // namespace detail
27+
using half = detail::half_impl::half;
2328
} // namespace sycl
2429
} // namespace cl
2530

sycl/include/CL/sycl/detail/generic_type_traits.hpp

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,32 @@ using is_genptr = bool_constant<
232232

233233
template <typename T> using is_nan_type = is_contained<T, gtl::nan_list>;
234234

235+
// nan_types
236+
template <typename T, typename Enable = void> struct nan_types;
237+
238+
template <typename T>
239+
struct nan_types<
240+
T, enable_if_t<is_contained<T, gtl::unsigned_short_list>::value, T>> {
241+
using ret_type = change_base_type_t<T, half>;
242+
using arg_type = find_same_size_type_t<gtl::scalar_unsigned_short_list, half>;
243+
};
244+
245+
template <typename T>
246+
struct nan_types<
247+
T, enable_if_t<is_contained<T, gtl::unsigned_int_list>::value, T>> {
248+
using ret_type = change_base_type_t<T, float>;
249+
using arg_type = find_same_size_type_t<gtl::scalar_unsigned_int_list, float>;
250+
};
251+
252+
template <typename T>
253+
struct nan_types<
254+
T,
255+
enable_if_t<is_contained<T, gtl::unsigned_long_integer_list>::value, T>> {
256+
using ret_type = change_base_type_t<T, double>;
257+
using arg_type =
258+
find_same_size_type_t<gtl::scalar_unsigned_long_integer_list, double>;
259+
};
260+
235261
template <typename T> using nan_return_t = typename nan_types<T, T>::ret_type;
236262

237263
template <typename T>
@@ -364,10 +390,14 @@ using select_cl_scalar_intergal_t =
364390
// select_cl_scalar_t picks corresponding cl_* type for input
365391
// scalar T or returns T if T is not scalar.
366392
template <typename T>
367-
using select_cl_scalar_t =
368-
conditional_t<std::is_integral<T>::value, select_cl_scalar_intergal_t<T>,
369-
conditional_t<std::is_floating_point<T>::value,
370-
select_cl_scalar_float_t<T>, T>>;
393+
using select_cl_scalar_t = conditional_t<
394+
std::is_integral<T>::value, select_cl_scalar_intergal_t<T>,
395+
conditional_t<
396+
std::is_floating_point<T>::value, select_cl_scalar_float_t<T>,
397+
// half is a special case: it is implemented differently on host and
398+
// device and therefore, might lower to different types
399+
conditional_t<std::is_same<T, half>::value,
400+
cl::sycl::detail::half_impl::BIsRepresentationT, T>>>;
371401

372402
// select_cl_vector_or_scalar does cl_* type selection for element type of
373403
// a vector type T and does scalar type substitution. If T is not
@@ -378,7 +408,13 @@ template <typename T>
378408
struct select_cl_vector_or_scalar<
379409
T, typename std::enable_if<is_vgentype<T>::value>::type> {
380410
using type =
381-
vec<select_cl_scalar_t<typename T::element_type>, T::get_count()>;
411+
// select_cl_scalar_t returns _Float16, so, we try to instantiate vec
412+
// class with _Float16 DataType, which is not expected there
413+
// So, leave vector<half, N> as-is
414+
vec<conditional_t<std::is_same<typename T::element_type, half>::value,
415+
typename T::element_type,
416+
select_cl_scalar_t<typename T::element_type>>,
417+
T::get_count()>;
382418
};
383419

384420
template <typename T>

sycl/include/CL/sycl/detail/kernel_desc.hpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,10 @@ __SYCL_INLINE namespace cl {
1515
namespace sycl {
1616
namespace detail {
1717

18-
namespace half_impl {
19-
20-
class half;
21-
// Half type is defined as _Float16 on device and as manually implemented half
22-
// type on host. Integration header is generated by device compiler so it sees
23-
// half type as _Float16 and it will add _Float16 to integration header if it
24-
// is used in kernel name template parameters. To avoid errors in host
25-
// compilation we remove _Float16 from integration header using following macro.
26-
// Same thing goes about bool type which is defined as _Bool.
2718
#ifndef __SYCL_DEVICE_ONLY__
28-
#define _Float16 cl::sycl::detail::half_impl::half
2919
#define _Bool bool
3020
#endif
3121

32-
} // namespace half_impl
33-
3422
// kernel parameter kinds
3523
enum class kernel_param_kind_t {
3624
kind_accessor,

sycl/include/CL/sycl/detail/type_traits.hpp

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,17 @@
1212
#include <CL/sycl/detail/generic_type_lists.hpp>
1313
#include <CL/sycl/detail/stl_type_traits.hpp>
1414
#include <CL/sycl/detail/type_list.hpp>
15-
#include <CL/sycl/half_type.hpp>
1615

1716
#include <type_traits>
1817

1918
__SYCL_INLINE namespace cl {
2019
namespace sycl {
20+
namespace detail {
21+
namespace half_impl {
22+
class half;
23+
}
24+
} // namespace detail
25+
using half = detail::half_impl::half;
2126

2227
// Forward declaration
2328
template <typename ElementType, access::address_space Space> class multi_ptr;
@@ -38,6 +43,16 @@ struct vector_size_impl<vec<T, N>> : int_constant<N> {};
3843
template <typename T>
3944
struct vector_size : vector_size_impl<remove_cv_t<remove_reference_t<T>>> {};
4045

46+
// 4.10.2.6 Memory layout and alignment
47+
template <typename T, int N>
48+
struct vector_alignment_impl
49+
: conditional_t<N == 3, int_constant<sizeof(T) * 4>,
50+
int_constant<sizeof(T) * N>> {};
51+
52+
template <typename T, int N>
53+
struct vector_alignment
54+
: vector_alignment_impl<remove_cv_t<remove_reference_t<T>>, N> {};
55+
4156
// vector_element
4257
template <typename T> struct vector_element_impl;
4358
template <typename T>
@@ -233,32 +248,6 @@ template <typename T, int N, typename TL> struct make_type_impl<vec<T, N>, TL> {
233248
template <typename T, typename TL>
234249
using make_type_t = typename make_type_impl<T, TL>::type;
235250

236-
// nan_types
237-
template <typename T, typename Enable = void> struct nan_types;
238-
239-
template <typename T>
240-
struct nan_types<
241-
T, enable_if_t<is_contained<T, gtl::unsigned_short_list>::value, T>> {
242-
using ret_type = change_base_type_t<T, half>;
243-
using arg_type = find_same_size_type_t<gtl::scalar_unsigned_short_list, half>;
244-
};
245-
246-
template <typename T>
247-
struct nan_types<
248-
T, enable_if_t<is_contained<T, gtl::unsigned_int_list>::value, T>> {
249-
using ret_type = change_base_type_t<T, float>;
250-
using arg_type = find_same_size_type_t<gtl::scalar_unsigned_int_list, float>;
251-
};
252-
253-
template <typename T>
254-
struct nan_types<
255-
T,
256-
enable_if_t<is_contained<T, gtl::unsigned_long_integer_list>::value, T>> {
257-
using ret_type = change_base_type_t<T, double>;
258-
using arg_type =
259-
find_same_size_type_t<gtl::scalar_unsigned_long_integer_list, double>;
260-
};
261-
262251
// make_larger_t
263252
template <typename T, typename Enable = void> struct make_larger_impl;
264253
template <typename T>

0 commit comments

Comments
 (0)