Skip to content
This repository has been archived by the owner on Apr 23, 2020. It is now read-only.

Commit

Permalink
[OpenCL] Add '-cl-uniform-work-group-size' compile option
Browse files Browse the repository at this point in the history
Summary:
OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option,
which requires that the global work-size be a multiple of the work-group
size specified to clEnqueueNDRangeKernel and allows optimizations that
are made possible by this restriction.

The patch introduces the support of this option.

To keep information about whether an OpenCL kernel has uniform work
group size or not, clang generates 'uniform-work-group-size' function
attribute for every kernel:
- "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
- "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
 '-cl-uniform-work-group-size' option was specified,
- "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
 '-cl-uniform-work-group-size' options was specified.

If the function is not an OpenCL kernel, 'uniform-work-group-size'
attribute isn't generated.

Patch by: krisb

Reviewers: yaxunl, Anastasia, b-sumner

Reviewed By: yaxunl, Anastasia

Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits

Differential Revision: https://reviews.llvm.org/D43570


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@325771 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
AlexeySotkin committed Feb 22, 2018
1 parent 0972dc7 commit 9acbe6c
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 3 deletions.
2 changes: 2 additions & 0 deletions include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@ def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">;
def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">;
def client__name : JoinedOrSeparate<["-"], "client_name">;
def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>;
def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">;
Expand Down
1 change: 1 addition & 0 deletions include/clang/Frontend/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ CODEGENOPT(NoTrappingMath , 1, 0) ///< Set when -fno-trapping-math is enabled
CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN.
CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size
CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
/// \brief Method of Objective-C dispatch to use.
ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
Expand Down
15 changes: 15 additions & 0 deletions lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,21 @@ void CodeGenModule::ConstructAttributeList(
}
}

if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) {
if (getLangOpts().OpenCLVersion <= 120) {
// OpenCL v1.2 Work groups are always uniform
FuncAttrs.addAttribute("uniform-work-group-size", "true");
} else {
// OpenCL v2.0 Work groups may be whether uniform or not.
// '-cl-uniform-work-group-size' compile option gets a hint
// to the compiler that the global work-size be a multiple of
// the work-group size specified to clEnqueueNDRangeKernel
// (i.e. work groups are uniform).
FuncAttrs.addAttribute("uniform-work-group-size",
llvm::toStringRef(CodeGenOpts.UniformWGSize));
}
}

if (!AttrOnCallSite) {
bool DisableTailCalls =
CodeGenOpts.DisableTailCalls ||
Expand Down
1 change: 1 addition & 0 deletions lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2379,6 +2379,7 @@ static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs) {
options::OPT_cl_no_signed_zeros,
options::OPT_cl_denorms_are_zero,
options::OPT_cl_fp32_correctly_rounded_divide_sqrt,
options::OPT_cl_uniform_work_group_size
};

if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
Expand Down
2 changes: 2 additions & 0 deletions lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
Opts.CorrectlyRoundedDivSqrt =
Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt);
Opts.UniformWGSize =
Args.hasArg(OPT_cl_uniform_work_group_size);
Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math);
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ struct_char_arr32 func_ret_struct_char_arr32()
return s;
}

// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 {
// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 {
// CHECK: ret i32 0
transparent_u func_transparent_union_ret()
{
Expand Down
16 changes: 16 additions & 0 deletions test/CodeGenOpenCL/cl-uniform-wg-size.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM

kernel void ker() {};
// CHECK: define{{.*}}@ker() #0

void foo() {};
// CHECK: define{{.*}}@foo() #1

// CHECK-LABEL: attributes #0
// CHECK-UNIFORM: "uniform-work-group-size"="true"
// CHECK-NONUNIFORM: "uniform-work-group-size"="false"

// CHECK-LABEL: attributes #1
// CHECK-NOT: uniform-work-group-size
5 changes: 3 additions & 2 deletions test/CodeGenOpenCL/convergent.cl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ void test_not_unroll() {
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]

// CHECK-LABEL: @assume_convergent_asm
// CHECK: tail call void asm sideeffect "s_barrier", ""() #4
// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
kernel void assume_convergent_asm()
{
__asm__ volatile("s_barrier");
Expand All @@ -138,4 +138,5 @@ kernel void assume_convergent_asm()
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
2 changes: 2 additions & 0 deletions test/Driver/opencl.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s
// RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s
// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s
// RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
// RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s

Expand All @@ -31,6 +32,7 @@
// CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"
// CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero"
// CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size"
// CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
// CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'

Expand Down

0 comments on commit 9acbe6c

Please sign in to comment.