-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][clang][CodeGen][opt] Add late-resolved feature identifying predicates #134016
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
91eeaf0
8bf1168
3421292
539c7e6
5926b9f
4381d93
d18f64e
7880ff4
719dfde
36b69b4
e327e15
d35efc5
5dee670
888a080
e35ac62
6c41ed2
8683148
a9b3e85
468a517
1b8b57e
18b4af2
a8bca2f
716cc1f
79035a9
6945c2e
9a7e250
0f04dbc
39a9d55
3fe116e
49c862a
f293f39
251476d
ebde49b
4bdd30e
a1b4a11
18841c1
76848d5
e1bfdf3
4f65468
e940d42
46adb74
ca9521d
11dd570
fab0d14
611ec0d
03b029f
32be1c0
012f74d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4966,6 +4966,118 @@ If no address spaces names are provided, all address spaces are fenced. | |
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local") | ||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global") | ||
|
||
__builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
``__builtin_amdgcn_processor_is`` and ``__builtin_amdgcn_is_invocable`` provide | ||
a functional mechanism for programatically querying: | ||
|
||
* the identity of the current target processor; | ||
* the capability of the current target processor to invoke a particular builtin. | ||
|
||
**Syntax**: | ||
|
||
.. code-block:: c | ||
|
||
__amdgpu_feature_predicate_t __builtin_amdgcn_processor_is(const char*); | ||
__amdgpu_feature_predicate_t __builtin_amdgcn_is_invocable(builtin_name); | ||
|
||
**Example of use**: | ||
|
||
.. code-block:: c++ | ||
|
||
if (__builtin_amdgcn_processor_is("gfx1201") || | ||
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var)) | ||
__builtin_amdgcn_s_sleep_var(x); | ||
|
||
if (!__builtin_amdgcn_processor_is("gfx906")) | ||
__builtin_amdgcn_s_wait_event_export_ready(); | ||
else if (__builtin_amdgcn_processor_is("gfx1010") || | ||
__builtin_amdgcn_processor_is("gfx1101")) | ||
__builtin_amdgcn_s_ttracedata_imm(1); | ||
|
||
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x; | ||
|
||
do { | ||
break; | ||
} while (__builtin_amdgcn_processor_is("gfx1010")); | ||
|
||
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break; | ||
|
||
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)) | ||
__builtin_amdgcn_s_wait_event_export_ready(); | ||
else if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_ttracedata_imm)) | ||
__builtin_amdgcn_s_ttracedata_imm(1); | ||
|
||
do { | ||
break; | ||
} while ( | ||
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32)); | ||
|
||
for (; __builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64); ++*p) | ||
break; | ||
|
||
**Description**: | ||
|
||
The builtins return a value of type ``__amdgpu_feature_predicate_t``, which is a | ||
target specific type that behaves as if its C++ definition was the following: | ||
Comment on lines
+5022
to
+5023
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this builtin work in C? If so, the docs should be updated to make it clear that this behavior applies to C as well as C++ and explain what it means in a bit more detail (presume that C users have no idea how C++ idioms work). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it does work in C, although I'm not convinced it will see significant use. That being said, there's no good reason to make it C++ only. I have tried to add an explanation for that context / provide a fleshed out example, please do let me know if it's more or less aligned with what you had in mind. Thanks! |
||
|
||
.. code-block:: c++ | ||
|
||
struct __amdgpu_feature_predicate_t { | ||
__amdgpu_feature_predicate_t() = delete; | ||
__amdgpu_feature_predicate_t(const __amdgpu_feature_predicate_t&) = delete; | ||
__amdgpu_feature_predicate_t(__amdgpu_feature_predicate_t&&) = delete; | ||
|
||
explicit | ||
operator bool() const noexcept; | ||
}; | ||
|
||
The builtins can be used in C as well, wherein the | ||
``__amdgpu_feature_predicate_t`` type behaves as an opaque, forward declared | ||
type with conditional automated conversion to ``_Bool`` when used as the | ||
predicate argument to a control structure: | ||
|
||
.. code-block:: c | ||
|
||
struct __amdgpu_feature_predicate_t ret(); // Error | ||
void arg(struct __amdgpu_feature_predicate_t); // Error | ||
void local() { | ||
struct __amdgpu_feature_predicate_t x; // Error | ||
struct __amdgpu_feature_predicate_t y = | ||
__builtin_amdgcn_processor_is("gfx900"); // Error | ||
} | ||
void valid_use() { | ||
_Bool x = (_Bool)__builtin_amdgcn_processor_is("gfx900"); // OK | ||
if (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool | ||
return; | ||
for (; __builtin_amdgcn_processor_is("gfx900");) // Implicit cast to _Bool | ||
break; | ||
while (__builtin_amdgcn_processor_is("gfx900")) // Implicit cast to _Bool | ||
break; | ||
do { | ||
break; | ||
} while (__builtin_amdgcn_processor_is("gfx900")); // Implicit cast to _Bool | ||
|
||
__builtin_amdgcn_processor_is("gfx900") ? x : !x; | ||
} | ||
|
||
The boolean interpretation of the predicate values returned by the builtins: | ||
|
||
* indicates whether the current target matches the argument; the argument MUST | ||
be a string literal and a valid AMDGPU target | ||
* indicates whether the builtin function passed as the argument can be invoked | ||
by the current target; the argument MUST be either a generic or AMDGPU | ||
specific builtin name | ||
|
||
When invoked while compiling for a concrete target, the builtins are evaluated | ||
early by Clang, and never produce any CodeGen effects / have no observable | ||
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v, | ||
which is an abstract target, a series of predicate values are implicitly | ||
created. These predicates get resolved when finalizing the compilation process | ||
for a concrete target, and shall reflect the latter's identity and features. | ||
Thus, it is possible to author high-level code, in e.g. HIP, that is target | ||
adaptive in a dynamic fashion, contrary to macro based mechanisms. | ||
|
||
ARM/AArch64 Language Extensions | ||
------------------------------- | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -15,12 +15,16 @@ | |||||
|
||||||
#include "clang/AST/ASTFwd.h" | ||||||
#include "clang/Sema/SemaBase.h" | ||||||
#include "llvm/ADT/SmallPtrSet.h" | ||||||
|
||||||
namespace clang { | ||||||
class AttributeCommonInfo; | ||||||
class Expr; | ||||||
class ParsedAttr; | ||||||
|
||||||
class SemaAMDGPU : public SemaBase { | ||||||
llvm::SmallPtrSet<Expr *, 32> ExpandedPredicates; | ||||||
|
||||||
public: | ||||||
SemaAMDGPU(Sema &S); | ||||||
|
||||||
|
@@ -64,6 +68,11 @@ class SemaAMDGPU : public SemaBase { | |||||
void handleAMDGPUNumVGPRAttr(Decl *D, const ParsedAttr &AL); | ||||||
void handleAMDGPUMaxNumWorkGroupsAttr(Decl *D, const ParsedAttr &AL); | ||||||
void handleAMDGPUFlatWorkGroupSizeAttr(Decl *D, const ParsedAttr &AL); | ||||||
|
||||||
/// Expand a valid use of the feature identification builtins into its | ||||||
/// corresponding sequence of instructions. | ||||||
Expr *ExpandAMDGPUPredicateBI(CallExpr *CE); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
bool IsPredicate(Expr *E) const; | ||||||
}; | ||||||
} // namespace clang | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same question about inf. loop here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this'd be an infinite loop ever, it's either 0 trips or one trip, if the predicate is true we just
break
. Otherwise, the discussion from the previous inf loop applies.