Skip to content

[SYCL] Add new FPGA loop attribute enable_loop_pipelining #9263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -2509,6 +2509,17 @@ def SYCLIntelMaxReinvocationDelay : StmtAttr {
def : MutualExclusions<[SYCLIntelDisableLoopPipelining,
SYCLIntelMaxReinvocationDelay]>;

def SYCLIntelEnableLoopPipelining : StmtAttr {
let Spellings = [CXX11<"intel", "enable_loop_pipelining">];
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
ErrorDiag, "'for', 'while', and 'do' statements">;
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
let Documentation = [SYCLIntelEnableLoopPipeliningAttrDocs];
}

def : MutualExclusions<[SYCLIntelDisableLoopPipelining,
SYCLIntelEnableLoopPipelining]>;

def SYCLIntelLocalNonConstVar : SubsetSubject<Var,
[{S->hasLocalStorage() &&
S->getKind() != Decl::ImplicitParam &&
Expand Down
39 changes: 38 additions & 1 deletion clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -3334,7 +3334,7 @@ disables pipelining of the loop or function data path, causing the loop
or function to be executed serially. Cannot be used on the same loop or
function, or in conjunction with ``max_interleaving``,
``speculated_iterations``, ``max_concurrency``, ``initiation_interval``,
``ivdep``, or ``max_reinvocation_delay``.
``ivdep``, ``max_reinvocation_delay`` or ``enable_loop_pipelining`` attribute.

.. code-block:: c++

Expand Down Expand Up @@ -3492,6 +3492,43 @@ conjunction with disable_loop_pipelining.
}];
}

def SYCLIntelEnableLoopPipeliningAttrDocs : Documentation {
let Category = DocCatVariable;
let Heading = "intel::enable_loop_pipelining";
let Content = [{
The ``enable_loop_pipelining`` attribute applies to a loop in SYCL device code.
Takes no arguments and enables pipelining of the loop. This attribute is useful
in the low-area flow, in which all loops are unpipelined by default. Cannot be
used on the same loop in conjunction with ``disable_loop_pipelining`` attribute.

.. code-block:: c++

void bar() {
int a[10];
[[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i) a[i] = 0;
}

void Array(int *array, size_t n) {
[[intel::enable_loop_pipelining]] for (int i = 0; i < n; ++i) array[i] = 0;
}

void count () {
int a1[10], i = 0;
[[intel::enable_loop_pipelining]] while (i < 10) {
a1[i++] = 3;
}
}

void check() {
int a = 10;
[[intel::enable_loop_pipelining]] do {
a = a + 1;
} while (a < 20);
}

}];
}

def SYCLIntelLoopFuseDocs : Documentation {
let Category = DocCatFunction;
let Heading = "loop_fuse, loop_fuse_independent";
Expand Down
21 changes: 18 additions & 3 deletions clang/lib/CodeGen/CGLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,15 @@ MDNode *LoopInfo::createMetadata(
LoopProperties.push_back(MDNode::get(Ctx, Vals));
}

// enable_loop_pipelining attribute corresponds to
// 'llvm.loop.intel.pipelining.enable, i32 1' metadata
if (Attrs.SYCLLoopPipeliningEnable) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.intel.pipelining.enable"),
ConstantAsMetadata::get(
ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
LoopProperties.push_back(MDNode::get(Ctx, Vals));
}

LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
AdditionalLoopProperties.end());
return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms);
Expand All @@ -635,7 +644,7 @@ LoopAttributes::LoopAttributes(bool IsParallel)
VectorizeScalable(LoopAttributes::Unspecified), InterleaveCount(0),
SYCLIInterval(0), SYCLLoopCoalesceEnable(false),
SYCLLoopCoalesceNLevels(0), SYCLLoopPipeliningDisable(false),
UnrollCount(0), UnrollAndJamCount(0),
SYCLLoopPipeliningEnable(false), UnrollCount(0), UnrollAndJamCount(0),
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
PipelineInitiationInterval(0), SYCLNofusionEnable(false),
MustProgress(false) {}
Expand All @@ -656,6 +665,7 @@ void LoopAttributes::clear() {
SYCLSpeculatedIterationsNIterations.reset();
SYCLIntelFPGAVariantCount.clear();
SYCLMaxReinvocationDelayNCycles.reset();
SYCLLoopPipeliningEnable = false;
UnrollCount = 0;
UnrollAndJamCount = 0;
VectorizeEnable = LoopAttributes::Unspecified;
Expand Down Expand Up @@ -693,8 +703,8 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
!Attrs.SYCLSpeculatedIterationsNIterations &&
Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 &&
!Attrs.SYCLMaxReinvocationDelayNCycles &&
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
Attrs.PipelineInitiationInterval == 0 &&
!Attrs.SYCLLoopPipeliningEnable && Attrs.UnrollAndJamCount == 0 &&
!Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 &&
Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
Expand Down Expand Up @@ -1027,6 +1037,8 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
// For attribute max_reinvocation_delay:
// n - 'llvm.loop.intel.max_reinvocation_delay.count, i32 n' metadata will be
// emitted
// For attribute enable_loop_pipelining:
// 'llvm.loop.intel.pipelining.enable, i32 1' metadata will be emitted
for (const auto *A : Attrs) {
if (const auto *SYCLIntelIVDep = dyn_cast<SYCLIntelIVDepAttr>(A))
addSYCLIVDepInfo(Header->getContext(), SYCLIntelIVDep->getSafelenValue(),
Expand Down Expand Up @@ -1099,6 +1111,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
setSYCLMaxReinvocationDelayNCycles(ArgVal.getSExtValue());
}

if (isa<SYCLIntelEnableLoopPipeliningAttr>(A))
setSYCLLoopPipeliningEnable();
}

setMustProgress(MustProgress);
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/CodeGen/CGLoopInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ struct LoopAttributes {
// Value for llvm.loop.intel.max_reinvocation_delay metadata.
llvm::Optional<unsigned> SYCLMaxReinvocationDelayNCycles;

/// Flag for llvm.loop.intel.pipelining.enable, i32 1 metadata.
bool SYCLLoopPipeliningEnable;

/// llvm.unroll.
unsigned UnrollCount;

Expand Down Expand Up @@ -418,6 +421,11 @@ class LoopInfoStack {
StagedAttrs.SYCLMaxReinvocationDelayNCycles = C;
}

/// Set flag of enable_loop_pipelining for the next loop pushed.
void setSYCLLoopPipeliningEnable() {
StagedAttrs.SYCLLoopPipeliningEnable = true;
}

private:
/// Returns true if there is LoopInfo on the stack.
bool hasInfo() const { return !Active.empty(); }
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Sema/SemaStmtAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,11 @@ static Attr * handleSYCLIntelMaxReinvocationDelayAttr(Sema &S, Stmt *St,
return S.BuildSYCLIntelMaxReinvocationDelayAttr(A, E);
}

static Attr *handleSYCLIntelEnableLoopPipeliningAttr(Sema &S, Stmt *,
const ParsedAttr &A) {
return new (S.Context) SYCLIntelEnableLoopPipeliningAttr(S.Context, A);
}

static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A,
SourceRange) {
IdentifierLoc *PragmaNameLoc = A.getArgAsIdent(0);
Expand Down Expand Up @@ -895,6 +900,8 @@ static void CheckForIncompatibleSYCLLoopAttributes(
CheckForDuplicationSYCLLoopAttribute<SYCLIntelNofusionAttr>(S, Attrs);
CheckForDuplicationSYCLLoopAttribute<SYCLIntelMaxReinvocationDelayAttr>(
S, Attrs);
CheckForDuplicationSYCLLoopAttribute<SYCLIntelEnableLoopPipeliningAttr>(
S, Attrs);
}

void CheckForIncompatibleUnrollHintAttributes(
Expand Down Expand Up @@ -1042,6 +1049,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
return handleIntelNofusionAttr(S, St, A);
case ParsedAttr::AT_SYCLIntelMaxReinvocationDelay:
return handleSYCLIntelMaxReinvocationDelayAttr(S, St, A);
case ParsedAttr::AT_SYCLIntelEnableLoopPipelining:
return handleSYCLIntelEnableLoopPipeliningAttr(S, St, A);
default:
// N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a
// declaration attribute is not written on a statement, but this code is
Expand Down
25 changes: 25 additions & 0 deletions clang/test/CodeGenSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
// CHECK: br label %for.cond, !llvm.loop ![[MD_MRD:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MRD_2:[0-9]+]]
// CHECK: br label %for.cond13, !llvm.loop ![[MD_MRD_3:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_FP:[0-9]+]]
// CHECK: br label %while.cond, !llvm.loop ![[MD_FP_1:[0-9]+]]
// CHECK: br i1 %cmp5, label %do.body, label %do.end, !llvm.loop ![[MD_FP_2:[0-9]+]]

void disable_loop_pipelining() {
int a[10];
Expand Down Expand Up @@ -171,6 +174,27 @@ void max_reinvocation_delay() {
a[i] = 0;
}

// Add codeGen tests for loop attribute: [[intel::enable_loop_pipelining]].
void fpga_enable_loop_pipelining() {
int a[10];
// CHECK: ![[MD_FP]] = distinct !{![[MD_FP]], ![[MP]], ![[MD_fpga_pipeline:[0-9]+]]}
// CHECK-NEXT: ![[MD_fpga_pipeline]] = !{!"llvm.loop.intel.pipelining.enable", i32 1}
[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i)
a[i] = 0;

// CHECK: ![[MD_FP_1]] = distinct !{![[MD_FP_1]], ![[MP]], ![[MD_fpga_pipeline]]}
int j = 0;
[[intel::enable_loop_pipelining]] while (j < 10) {
a[j++] = 3;
}

// CHECK: ![[MD_FP_2]] = distinct !{![[MD_FP_2]], ![[MP]], ![[MD_fpga_pipeline]]}
int b = 10;
[[intel::enable_loop_pipelining]] do {
b = b + 1;
} while (b < 20);
}

template <typename name, typename Func>
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
kernelFunc();
Expand All @@ -187,6 +211,7 @@ int main() {
speculated_iterations<4, 0>();
loop_count_control<12>();
max_reinvocation_delay<3, 1>();
fpga_enable_loop_pipelining();
});
return 0;
}
50 changes: 50 additions & 0 deletions clang/test/SemaSYCL/intel-fpga-enable-loop-pipelining-ast.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -Wno-sycl-2017-compat -ast-dump %s | FileCheck %s

// Add AST tests for Loop attribute: [[intel::enable_loop_pipelining]].

#include "sycl.hpp"

using namespace sycl;
queue q;

void fpga_enable_loop_pipelining() {
int a1[10], a2[10];
// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
[[intel::enable_loop_pipelining]] for (int p = 0; p < 10; ++p) {
a1[p] = a2[p] = 0;
}

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
int i = 0;
[[intel::enable_loop_pipelining]] while (i < 10) {
a1[i] += 3;
}

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
for (int i = 0; i < 10; ++i) {
[[intel::enable_loop_pipelining]] for (int j = 0; j < 10; ++j) {
a1[i] += a1[j];
}
}

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
int b = 10;
[[intel::enable_loop_pipelining]] do {
b = b + 1;
} while (b < 20);

// CHECK: AttributedStmt
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
int c[] = {0, 1, 2, 3, 4, 5};
[[intel::enable_loop_pipelining]] for (int n : c) { n *= 2; }
}

void foo() {
q.submit([&](handler &h) {
h.single_task<class kernel_function>([]() { fpga_enable_loop_pipelining(); });
});
}
21 changes: 21 additions & 0 deletions clang/test/SemaSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ void foo() {
[[intel::loop_count(8)]] int m[10];
// expected-error@+1 {{'max_reinvocation_delay' attribute cannot be applied to a declaration}}
[[intel::max_reinvocation_delay(1)]] int n[10];
// expected-error@+1{{'enable_loop_pipelining' attribute cannot be applied to a declaration}}
[[intel::enable_loop_pipelining]] int o[10];
}

// Test for deprecated spelling of Intel FPGA loop attributes
Expand Down Expand Up @@ -127,6 +129,9 @@ void boo() {
// expected-error@+1 {{'max_reinvocation_delay' attribute takes one argument}}
[[intel::max_reinvocation_delay(5, 2)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// expected-error@+1 {{'enable_loop_pipelining' attribute takes no arguments}}
[[intel::enable_loop_pipelining(0)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// Test for incorrect argument value for Intel FPGA loop attributes
Expand Down Expand Up @@ -227,6 +232,10 @@ void goo() {
// expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'const char[8]'}}
[[intel::max_reinvocation_delay("test123")]] for (int i = 0; i != 10; ++i)
a[i] = 0;

// no diagnostics are expected
[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// Test for Intel FPGA loop attributes duplication
Expand Down Expand Up @@ -350,6 +359,11 @@ void zoo() {
// expected-error@+1{{duplicate Intel FPGA loop attribute 'max_reinvocation_delay'}}
[[intel::max_reinvocation_delay(1)]] for (int i = 0; i != 10; ++i)
a[i] = 0;

[[intel::enable_loop_pipelining]]
// expected-error@+1 {{duplicate Intel FPGA loop attribute 'enable_loop_pipelining'}}
[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// Test for Intel FPGA loop attributes compatibility
Expand Down Expand Up @@ -394,6 +408,13 @@ void loop_attrs_compatibility() {
// expected-note@+1 {{conflicting attribute is here}}
[[intel::max_reinvocation_delay(1)]] [[intel::disable_loop_pipelining]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// no diagnostics are expected
[[intel::enable_loop_pipelining]] [[intel::loop_coalesce]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// expected-error@+2 {{'disable_loop_pipelining' and 'enable_loop_pipelining' attributes are not compatible}}
// expected-note@+1 {{conflicting attribute is here}}
[[intel::enable_loop_pipelining]] [[intel::disable_loop_pipelining]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template<int A, int B, int C>
Expand Down