Clang cuda functions not handling concepts correctly. #67507

rnburn · 2023-09-27T00:14:49Z

Take the example code posted below.

If I compile, I get this error

> clang++ -x cuda -c --cuda-gpu-arch=sm_70 -std=c++20 main.cc
clang++: warning: CUDA version is newer than the latest partially supported version 12.1 [-Wunknown-cuda-version]
root@3aa37681f2bd:/src/concept_bug# clang++ -x cuda -c --cuda-gpu-arch=sm_70 -std=c++20 main.cc
clang++: warning: CUDA version is newer than the latest partially supported version 12.1 [-Wunknown-cuda-version]
main.cc:116:3: error: no matching function for call to 'kernel'
  116 |   kernel<<<1, 1>>>(bucket_sums, generators, scalars, 0);
      |   ^~~~~~
main.cc:26:17: note: candidate template ignored: constraints not satisfied [with T = E]
   26 | __global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
      |                 ^
main.cc:25:11: note: because 'sxt::bascrv::element97' does not satisfy 'element'
   25 | template <bascrv::element T>
      |           ^
main.cc:13:3: note: because 'double_element(res, e)' would be invalid: no matching function for call to 'double_element'
   13 |   double_element(res, e);
      |   ^
1 error generated when compiling for sm_70.

But the concept element should be satified. In fact, if I uncomment the line

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

the compilation will succeed.

Here is the version of clang I'm using

clang++ --version
Ubuntu clang version 18.0.0 (++20230913042131+f3fdc967a87d-1~exp1~20230913042254.1182)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

code:

#include <iostream>

// element.h
#include <cstdint>
#include <concepts>

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element
//--------------------------------------------------------------------------------------------------
template <class T>
concept element = requires(T& res, const T& e) {
  double_element(res, e);
  add(res, e, e);
  neg(res, e);
  add_inplace(res, res);
  { T::identity() } noexcept -> std::same_as<T>;
  mark(res);
  { is_marked(e) } noexcept -> std::same_as<bool>;
};
} // namespace sxt::bascrv

// kernel.h
namespace sxt::mtxbk {
template <bascrv::element T>
__global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
                       unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}

template <bascrv::element T>
void f(T* bucket_sums, const T* generators, const uint8_t* scalars, unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}
} // namespace sxt::mtxbk

// example_element.h
#include <cstdint>

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element97
//--------------------------------------------------------------------------------------------------
struct element97 {
  uint32_t value;
  bool marked = false;

  static element97 identity() noexcept {
    return {0};
  }

  bool operator==(const element97&) const noexcept = default;
};

//--------------------------------------------------------------------------------------------------
// double_element
//--------------------------------------------------------------------------------------------------
inline void double_element(element97& res, const element97& e) noexcept {
  res.value = (e.value + e.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// neg
//--------------------------------------------------------------------------------------------------
inline void neg(element97& res, const element97& e) noexcept {
  res.value = (97u - e.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// add
//--------------------------------------------------------------------------------------------------
inline void add(element97& res, const element97& x, const element97& y) noexcept {
  res.value = (x.value + y.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// add_inplace
//--------------------------------------------------------------------------------------------------
inline void add_inplace(element97& res, const element97& x) noexcept {
  res.value = (res.value + x.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// mark
//--------------------------------------------------------------------------------------------------
inline void mark(element97& res) noexcept {
  res.marked = true;
}

//--------------------------------------------------------------------------------------------------
// is_marked
//--------------------------------------------------------------------------------------------------
inline bool is_marked(const element97& e) noexcept {
  return e.marked;
}
} // namespace sxt::bascrv

// main
using namespace sxt;
using namespace sxt::mtxbk;

int main() {
  using E = bascrv::element97;
  E* bucket_sums = nullptr;
  const E* generators = nullptr;
  const uint8_t* scalars = nullptr;

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

  kernel<<<1, 1>>>(bucket_sums, generators, scalars, 0);

  return 0;
}

The text was updated successfully, but these errors were encountered:

Artem-B · 2023-09-27T00:40:11Z

That's an interesting problem.

main.cc:13:3: note: because 'double_element(res, e)' would be invalid: no matching function for call to 'double_element'
   13 |   double_element(res, e);
      |   ^
1 error generated when compiling for sm_70.

The key here is generated when compiling for sm_70.. I don't know when/where/how concepts consider the double_element() function, but I suspect compiler tries to do an overload resolution and, because it happens during GPU-side compilation, it finds a host-only function double_element and rightfully complains that it is not callable on the GPU.

If you were to make the function constexpr or __host__ __device__, or provide a device overload, things may work a bit better.

rnburn · 2023-09-27T00:48:35Z

I was able to find some workarounds, but the behavior definitely doesn't seem like it's correct.

I suspect concept satisfiability is cached, so if I call the a host functions with the same signature; then call the GPU function, it works as expected.

I don't think concept satisfiability should dependent on whether the functions have device or host markups. You may never invoke associated functions in device code; but they should still be used to check the concept requirements.

rnburn · 2023-09-27T00:52:00Z

Also, nvcc compiles the example fine.

Artem-B · 2023-09-27T02:16:38Z

@yxsamliu ^^^ FYI.

@rnburn Would it be possible to further reduce the reproducer?

rnburn · 2023-09-27T16:18:09Z

Reduced version:

// element.h

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element
//--------------------------------------------------------------------------------------------------
template <class T>
concept element = requires(T& res, const T& e) {
  double_element(res, e);
};
} // namespace sxt::bascrv

// kernel.h
namespace sxt::mtxbk {
template <bascrv::element T>
__global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
                       unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}

template <bascrv::element T>
void f(T* bucket_sums, const T* generators, const uint8_t* scalars, unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}
} // namespace sxt::mtxbk

// example_element.h

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element97
//--------------------------------------------------------------------------------------------------
struct element97 {
  unsigned value;
};

//--------------------------------------------------------------------------------------------------
// double_element
//--------------------------------------------------------------------------------------------------
inline void double_element(element97& res, const element97& e) noexcept {
  res.value = (e.value + e.value) % 97u;
}
} // namespace sxt::bascrv

// main
using namespace sxt;
using namespace sxt::mtxbk;

int main() {
  using E = bascrv::element97;
  E* bucket_sums = nullptr;
  const E* generators = nullptr;
  const uint8_t* scalars = nullptr;

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

  kernel<<<1, 1>>>(bucket_sums, generators, scalars, 0);

  return 0;
}

Artem-B · 2023-09-27T17:09:09Z

As I suggested above, double_element must be __host_- __device__ or constexpr. https://godbolt.org/z/MYh6TKjKK
Otherwise there will be an inconsistency in how the same concept is handled during host and GPU compilation.

I believe clang is correct to error out here, because the kernel, which must satisfy the requirement is not allowed to call a host-only double_element(res, e); function.

The main issue is that it's not clear how concepts are supposed to interact with target-based function overloading.
I.e. the concept expects a function to be callable, but that would normally depend on the callee context. The same function may be callable from some functions, but not others. That context may not propagate correctly to the place where we consider the requires.

This is largely unexplored territory. Collecting the corner cases like these and the workarounds would be useful as the starting point, so we can assess what the issues are and what would be the way to deal with them.

rnburn · 2023-09-27T17:15:55Z

As I suggested above, double_element must be _host- device or constexpr. https://godbolt.org/z/MYh6TKjKK
Otherwise there will be an inconsistency in how the same concept is handled during host and GPU compilation.

I believe clang is correct to error out here, because the kernel, which must satisfy the requirement is not allowed to call a host-only double_element(res, e); function.

This cannot be correct. If I call

1: host_funct_with_same_signature(args...); // this is valid
2: kernel<<<1, 1>>>(bucket_sums, generators, scalars, 0); // now this works because the compiler already determined the concept as satisfied and it won't check again

Remove 1 and you get the compilation error. Even if your argument is that clang is correct to error (which I very much don't think it is). Then it should error consistently.

The main issue is that it's not clear how concepts are supposed to interact with target-based function overloading.
I.e. the concept expects a function to be callable, but that would normally depend on the callee context.

Concept satisfiability should be completely separate from whether a function is host, device, etc. This is what nvcc appears to do. And a concept can have requirements that aren't invoked from device code.

Artem-B · 2023-09-27T17:35:57Z

As I said -- it's unexplored territory. I do agree that compiler should behave consistently. Clang not complaining in the second case looks like a bug to me, not the reason why the original error is wrong.

Concept satisfiability should be completely separate from whether a function is host, device, etc.
I'm not sure that will be possible to do.
E.g. in your example you could apply the same concept to both GPU-only kernel and a CPU-only function.

The 'requires' will be satisfied for the host function, but not for the GPU kernel. Granted, my familiarity with concepts is rather rudimentary and it's likely that I'm misunderstanding something. If I'm wrong here, please tell me why you would expect the example above, as written, to compile on the GPU.

This is what nvcc appears to do

NVCC not reporting an error may be a bug, too. I do not see how that concept can be satisfied for the GPU kernel, when double_element() is not callable on the GPU.

rnburn · 2023-09-27T17:50:48Z

Concepts can be high level and bundle a lot of behavior. You might have a generic concept such as geometric_shape, for example, that builds on other lower level concept primitives and would require a type to implement hundreds of different function requirements. Perhaps only a few would be relevant to GPU code. Nonetheless you may want to express generic APIs in terms of geometric_shape as having a collection of known behavior could be simpler and more maintainable than going through individual generic functions and determining minimal requirements.

Artem-B · 2023-09-27T18:10:00Z

That's all good. But how do we reconcile it with the fact that with heterogeneous computing in the picture, some of the assumptions concepts may be implicitly relying on are no longer true? As I said, it's an unexplored territory.

You expectation appears to be that the concepts written for the host-only code should magically work for GPU code, too, and I don't think it's always doable, at least for the concepts that depend on call-ability of some functions. It would work for a subset of such functions, the ones that are callable on both host and device -- that would be mostly constexpr functions and lambdas, but even there will be issues as the overload sets may be different during the host and the device compilation.

rnburn · 2023-09-27T18:22:21Z

You expectation appears to be that the concepts written for the host-only code should magically work for GPU code, too, and I don't think it's always doable

No, I would expect an error if they get invoked from device code, but no error otherwise. For example, geometric_shape might require a function compute_area. You might use that function to compute the areas for a large array of objects on the GPU. But it may also require a function draw_object that interacts with display drivers and makes no sense to invoke from device code.

I would expect geometric_shape to be satisfied if the functions are implemented. I would expect a compilation error if you try to invoke compute_area from device code and it doesn't have __device__. I would not expect an error if draw_object is not marked with __device__ because it should never be called from device code.

Artem-B · 2023-09-27T18:35:30Z

Can you give me a specific example? Here's a compiler explorer page with both host and device side CUDA compilation: https://cuda.godbolt.org/z/4rjda6r6T that you can use as a starting point.

Please fill in an example of the scenario you've outlined above with the expectations you would have for the code.
This would help me to understand what you have in mind, what happens now and, maybe, what should happen.

rnburn · 2023-09-27T21:28:16Z

Here's a sketch of what should work

#include <concepts>

template <class T>
concept geometric_shape = requires(const T& sh) {
  { area(sh) } -> std::convertible_to<double>;
  draw(sh);
};

struct triangle {
  double x1, y1, x2, y2, x3, y3;
};

__host__ __device__
double area(const triangle& tr) {
  return 123;
}

void draw(const triangle& tr) {
  // interact with display
}

template <geometric_shape T>
__global__ void compute_areas(double* areas, const T* shapes, unsigned n) {
  // invoke areas[i] = area(shapes[i]) for all i
}

int main() {
  double* areas = nullptr;
  triangle* shapes = nullptr;
  // static_assert(geometric_shape<triangle>); // this will not error
  compute_areas<<<32, 64>>>(areas, shapes, 0); // this should work
  return 0;
}

Artem-B · 2023-09-27T22:22:36Z

Surprisingly enough it's std::convertible that gives us trouble here: https://cuda.godbolt.org/z/d1W1sM7rj
This is something I should look at, but it's somewhat unrelated to this discussion.

template <class T>
concept geometric_shape = requires(const T& sh) {
  { area(sh) } -> std::convertible_to<double>;
  draw(sh);
};

What are the expectations enforced by the concept?

{ area(sh) } -> std::convertible_to<double> Does this only care that the return type of area() is convertible to double?
draw(sh) -- does it require draw to be callable? If so, from where? For CUDA it does matter. Considering that geometric_shape is used in the template parameter type, it's not clear. When would this check be expected to fail? E.g. should the requirement be unsatisfied if a kernel would not be able to call draw(sh) ? Or should it fail if the host can't call it?

What would a concept look like for the following conditions:

"GPU can call func(a)"
"GPU can not call func(a)")
"host can call func(a)"
"host can not call func(a)"

rnburn · 2023-09-27T22:54:06Z

{ area(sh) } -> std::convertible_to Does this only care that the return type of area() is convertible to double?

Yes. That's what it's expressing. For an example, you could also write as std::same_as<double>.

What would a concept look like for the following conditions:

I'm not expecting host/device to factor into whether a concept is satisfied. So, if you call

template <geometric_shape T>
__global__ void compute_areas(double* areas, const T* shapes, unsigned n) {
  // ....
     draw(shapes[i]);
 // ...
}

I'd expect to not get a substitution failure, but still get an error that's equivalent to calling the function with no concept constraint.

template <class T>
__global__ void compute_areas(double* areas, const T* shapes, unsigned n) {
  // ....
     draw(shapes[i]);
 // ...
}

Perhaps it might be useful to consider extending concept syntax to be able to express host-device specific requirements; but I image the most basic version would ignore __device__, __host__ markup (or at least ignore __device__). You'd still get errors if you get to the point where you're trying to mix calls to non-device functions in device-only IR.

yxsamliu · 2023-09-28T14:03:09Z

If we allow host function to fulfill requirement of a device function, we may end up with a fake satisfaction of a real requirement, which defeats the purpose of concept.

A better solution might be using different concepts for host and device templates. The concept for host only contains requirements on the host side. The concept for device only contains requirements on the device side.

yxsamliu · 2023-09-28T14:51:26Z

However, I think we may need to check concept requirements in host context by default, even if the check is done for device template.

This is to be compatible with C++ constexpr templates and also nvcc.

The rationale is that usually, users usually have different requirements on the host and device side. The requirements in a concept for C++ are for the host side by default. It may not be the users' intention to transfer them to the device side by function names.

By doing this, we keep the compatibility with C++ and nvcc, but lose the capability of expressing requirements on device side.

I think that may be acceptable as a baseline.

Then we can consider introducing some new extension for expressing requirements on device side, e.g.

we may use syntax like public/private:

__device__: // requriements for device side follows

we may allow adding __device__ attribute to the function calls in requirements to indicate it is a requirement for host side
we may allow adding __device__ __host__ attributes to the concept to indicate all the requirements are for a specific side

we can use conditional macros #if __has_extension to enable these device side requirements for clang only.

yxsamliu · 2023-09-28T17:09:25Z

Currently, constraints are checked in Sema::FinishTemplateArgumentDeduction, where the current function in ASTContext is set to the instantiated template function. When resolving functions for the constraints, clang assumes the caller is the current function, that's why host functions failed candidacy in template kernels.

clang caches the constraint checking result per concept/type matching: https://github.com/llvm/llvm-project/blob/main/clang/lib/Sema/SemaConcept.cpp#L502 . It assumes the result does not depend on the instantiation context. That's why after a successful constraints-checking with a host function instantiation, it does not do the checking for the kernel instantiation.

I think the fix should be to let constraint checking have its own host/device context and by default it is host to be compatible with C++. This will make the constraint checking independent of callers and make the caching valid. Later on, we may introduce device constraints by other means, e.g. adding __device__ attribute per function call in constraints.

Currently, constraints are checked in Sema::FinishTemplateArgumentDeduction, where the current function in ASTContext is set to the instantiated template function. When resolving functions for the constraints, clang assumes the caller is the current function, This causes incompatibility with nvcc and also for constexpr template functions with C++. clang caches the constraint checking result per concept/type matching. It assumes the result does not depend on the instantiation context. This patch let constraint checking have its own host/device context and by default it is host to be compatible with C++. This makes the constraint checking independent of callers and make the caching valid. In the future, we may introduce device constraints by other means, e.g. adding __device__ attribute per function call in constraints. Fixes: llvm#67507

llvmbot · 2023-10-02T11:56:34Z

@llvm/issue-subscribers-c-20

Take the example code posted below.

If I compile, I get this error

&gt; clang++ -x cuda -c --cuda-gpu-arch=sm_70 -std=c++20 main.cc
clang++: warning: CUDA version is newer than the latest partially supported version 12.1 [-Wunknown-cuda-version]
root@<!-- -->3aa37681f2bd:/src/concept_bug# clang++ -x cuda -c --cuda-gpu-arch=sm_70 -std=c++20 main.cc
clang++: warning: CUDA version is newer than the latest partially supported version 12.1 [-Wunknown-cuda-version]
main.cc:116:3: error: no matching function for call to 'kernel'
  116 |   kernel&lt;&lt;&lt;1, 1&gt;&gt;&gt;(bucket_sums, generators, scalars, 0);
      |   ^~~~~~
main.cc:26:17: note: candidate template ignored: constraints not satisfied [with T = E]
   26 | __global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
      |                 ^
main.cc:25:11: note: because 'sxt::bascrv::element97' does not satisfy 'element'
   25 | template &lt;bascrv::element T&gt;
      |           ^
main.cc:13:3: note: because 'double_element(res, e)' would be invalid: no matching function for call to 'double_element'
   13 |   double_element(res, e);
      |   ^
1 error generated when compiling for sm_70.

But the concept element should be satified. In fact, if I uncomment the line

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

the compilation will succeed.

Here is the version of clang I'm using

clang++ --version
Ubuntu clang version 18.0.0 (++20230913042131+f3fdc967a87d-1~exp1~20230913042254.1182)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

code:

#include &lt;iostream&gt;

// element.h
#include &lt;cstdint&gt;
#include &lt;concepts&gt;

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element
//--------------------------------------------------------------------------------------------------
template &lt;class T&gt;
concept element = requires(T&amp; res, const T&amp; e) {
  double_element(res, e);
  add(res, e, e);
  neg(res, e);
  add_inplace(res, res);
  { T::identity() } noexcept -&gt; std::same_as&lt;T&gt;;
  mark(res);
  { is_marked(e) } noexcept -&gt; std::same_as&lt;bool&gt;;
};
} // namespace sxt::bascrv

// kernel.h
namespace sxt::mtxbk {
template &lt;bascrv::element T&gt;
__global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
                       unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}

template &lt;bascrv::element T&gt;
void f(T* bucket_sums, const T* generators, const uint8_t* scalars, unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}
} // namespace sxt::mtxbk

// example_element.h
#include &lt;cstdint&gt;

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element97
//--------------------------------------------------------------------------------------------------
struct element97 {
  uint32_t value;
  bool marked = false;

  static element97 identity() noexcept {
    return {0};
  }

  bool operator==(const element97&amp;) const noexcept = default;
};

//--------------------------------------------------------------------------------------------------
// double_element
//--------------------------------------------------------------------------------------------------
inline void double_element(element97&amp; res, const element97&amp; e) noexcept {
  res.value = (e.value + e.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// neg
//--------------------------------------------------------------------------------------------------
inline void neg(element97&amp; res, const element97&amp; e) noexcept {
  res.value = (97u - e.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// add
//--------------------------------------------------------------------------------------------------
inline void add(element97&amp; res, const element97&amp; x, const element97&amp; y) noexcept {
  res.value = (x.value + y.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// add_inplace
//--------------------------------------------------------------------------------------------------
inline void add_inplace(element97&amp; res, const element97&amp; x) noexcept {
  res.value = (res.value + x.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// mark
//--------------------------------------------------------------------------------------------------
inline void mark(element97&amp; res) noexcept {
  res.marked = true;
}

//--------------------------------------------------------------------------------------------------
// is_marked
//--------------------------------------------------------------------------------------------------
inline bool is_marked(const element97&amp; e) noexcept {
  return e.marked;
}
} // namespace sxt::bascrv

// main
using namespace sxt;
using namespace sxt::mtxbk;

int main() {
  using E = bascrv::element97;
  E* bucket_sums = nullptr;
  const E* generators = nullptr;
  const uint8_t* scalars = nullptr;

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

  kernel&lt;&lt;&lt;1, 1&gt;&gt;&gt;(bucket_sums, generators, scalars, 0);

  return 0;
}

llvmbot · 2023-10-02T11:56:36Z

@llvm/issue-subscribers-clang-frontend

Take the example code posted below.

If I compile, I get this error

&gt; clang++ -x cuda -c --cuda-gpu-arch=sm_70 -std=c++20 main.cc
clang++: warning: CUDA version is newer than the latest partially supported version 12.1 [-Wunknown-cuda-version]
root@<!-- -->3aa37681f2bd:/src/concept_bug# clang++ -x cuda -c --cuda-gpu-arch=sm_70 -std=c++20 main.cc
clang++: warning: CUDA version is newer than the latest partially supported version 12.1 [-Wunknown-cuda-version]
main.cc:116:3: error: no matching function for call to 'kernel'
  116 |   kernel&lt;&lt;&lt;1, 1&gt;&gt;&gt;(bucket_sums, generators, scalars, 0);
      |   ^~~~~~
main.cc:26:17: note: candidate template ignored: constraints not satisfied [with T = E]
   26 | __global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
      |                 ^
main.cc:25:11: note: because 'sxt::bascrv::element97' does not satisfy 'element'
   25 | template &lt;bascrv::element T&gt;
      |           ^
main.cc:13:3: note: because 'double_element(res, e)' would be invalid: no matching function for call to 'double_element'
   13 |   double_element(res, e);
      |   ^
1 error generated when compiling for sm_70.

But the concept element should be satified. In fact, if I uncomment the line

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

the compilation will succeed.

Here is the version of clang I'm using

clang++ --version
Ubuntu clang version 18.0.0 (++20230913042131+f3fdc967a87d-1~exp1~20230913042254.1182)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

code:

#include &lt;iostream&gt;

// element.h
#include &lt;cstdint&gt;
#include &lt;concepts&gt;

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element
//--------------------------------------------------------------------------------------------------
template &lt;class T&gt;
concept element = requires(T&amp; res, const T&amp; e) {
  double_element(res, e);
  add(res, e, e);
  neg(res, e);
  add_inplace(res, res);
  { T::identity() } noexcept -&gt; std::same_as&lt;T&gt;;
  mark(res);
  { is_marked(e) } noexcept -&gt; std::same_as&lt;bool&gt;;
};
} // namespace sxt::bascrv

// kernel.h
namespace sxt::mtxbk {
template &lt;bascrv::element T&gt;
__global__ void kernel(T* bucket_sums, const T* generators, const uint8_t* scalars,
                       unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}

template &lt;bascrv::element T&gt;
void f(T* bucket_sums, const T* generators, const uint8_t* scalars, unsigned length) {
  (void)bucket_sums;
  (void)generators;
  (void)scalars;
  (void)length;
}
} // namespace sxt::mtxbk

// example_element.h
#include &lt;cstdint&gt;

namespace sxt::bascrv {
//--------------------------------------------------------------------------------------------------
// element97
//--------------------------------------------------------------------------------------------------
struct element97 {
  uint32_t value;
  bool marked = false;

  static element97 identity() noexcept {
    return {0};
  }

  bool operator==(const element97&amp;) const noexcept = default;
};

//--------------------------------------------------------------------------------------------------
// double_element
//--------------------------------------------------------------------------------------------------
inline void double_element(element97&amp; res, const element97&amp; e) noexcept {
  res.value = (e.value + e.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// neg
//--------------------------------------------------------------------------------------------------
inline void neg(element97&amp; res, const element97&amp; e) noexcept {
  res.value = (97u - e.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// add
//--------------------------------------------------------------------------------------------------
inline void add(element97&amp; res, const element97&amp; x, const element97&amp; y) noexcept {
  res.value = (x.value + y.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// add_inplace
//--------------------------------------------------------------------------------------------------
inline void add_inplace(element97&amp; res, const element97&amp; x) noexcept {
  res.value = (res.value + x.value) % 97u;
}

//--------------------------------------------------------------------------------------------------
// mark
//--------------------------------------------------------------------------------------------------
inline void mark(element97&amp; res) noexcept {
  res.marked = true;
}

//--------------------------------------------------------------------------------------------------
// is_marked
//--------------------------------------------------------------------------------------------------
inline bool is_marked(const element97&amp; e) noexcept {
  return e.marked;
}
} // namespace sxt::bascrv

// main
using namespace sxt;
using namespace sxt::mtxbk;

int main() {
  using E = bascrv::element97;
  E* bucket_sums = nullptr;
  const E* generators = nullptr;
  const uint8_t* scalars = nullptr;

  // f(bucket_sums, generators, scalars, 0); // uncomment this line and things compile. ¯\_(ツ)_/

  kernel&lt;&lt;&lt;1, 1&gt;&gt;&gt;(bucket_sums, generators, scalars, 0);

  return 0;
}

github-actions bot added the clang Clang issues not falling into any other category label Sep 27, 2023

yxsamliu linked a pull request Sep 28, 2023 that will close this issue

[CUDA][HIP] Fix host/device context in concept #67721

Open

Artem-B mentioned this issue Sep 28, 2023

Add jobs using clang as CUDA compiler NVIDIA/cccl#493

Merged

6 tasks

Endilll added c++20 cuda clang:frontend Language frontend issues, e.g. anything involving "Sema" and removed clang Clang issues not falling into any other category labels Oct 2, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Clang cuda functions not handling concepts correctly. #67507

Clang cuda functions not handling concepts correctly. #67507

rnburn commented Sep 27, 2023 •

edited

Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 •

edited

Loading

rnburn commented Sep 27, 2023

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 •

edited

Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 •

edited

Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 •

edited

Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023

yxsamliu commented Sep 28, 2023

yxsamliu commented Sep 28, 2023 •

edited

Loading

yxsamliu commented Sep 28, 2023

llvmbot commented Oct 2, 2023

llvmbot commented Oct 2, 2023

Clang cuda functions not handling concepts correctly. #67507

Clang cuda functions not handling concepts correctly. #67507

Comments

rnburn commented Sep 27, 2023 • edited Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 • edited Loading

rnburn commented Sep 27, 2023

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 • edited Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 • edited Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023 • edited Loading

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023

Artem-B commented Sep 27, 2023

rnburn commented Sep 27, 2023

yxsamliu commented Sep 28, 2023

yxsamliu commented Sep 28, 2023 • edited Loading

yxsamliu commented Sep 28, 2023

llvmbot commented Oct 2, 2023

llvmbot commented Oct 2, 2023

rnburn commented Sep 27, 2023 •

edited

Loading

rnburn commented Sep 27, 2023 •

edited

Loading

rnburn commented Sep 27, 2023 •

edited

Loading

rnburn commented Sep 27, 2023 •

edited

Loading

rnburn commented Sep 27, 2023 •

edited

Loading

yxsamliu commented Sep 28, 2023 •

edited

Loading