Skip to content

Commit 4147b8d

Browse files
authored
[SYCL] Add new FPGA loop attribute enable_loop_pipelining (#9263)
This patch adds support for new FPGA attribute called enable_loop_pipelining, which is similar to disable_loop_pipelining. This attribute is useful in the low-area flow, in which all loops are unpipelined by default and the user may wish to enable loop pipelining. The [[intel::enable_loop_pipelining]] attribute is applicable to loops in SYCL device code. If the loop has this attribute, the LLVM IR loop will have the llvm.loop.intel.pipelining.enable metadata, with value 1, as shown below: br .... !llvm.loop !0 !0 = {!1} !1 = !{!"llvm.loop.intel.pipelining.enable", i32 1} An error should be output if both [[intel::enable_loop_pipelining]] and [[intel::disable_loop_pipelining]] are applied to the same loop. --------- Signed-off-by: Soumi Manna <soumi.manna@intel.com>
1 parent b11316a commit 4147b8d

File tree

8 files changed

+180
-4
lines changed

8 files changed

+180
-4
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2509,6 +2509,17 @@ def SYCLIntelMaxReinvocationDelay : StmtAttr {
25092509
def : MutualExclusions<[SYCLIntelDisableLoopPipelining,
25102510
SYCLIntelMaxReinvocationDelay]>;
25112511

2512+
def SYCLIntelEnableLoopPipelining : StmtAttr {
2513+
let Spellings = [CXX11<"intel", "enable_loop_pipelining">];
2514+
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
2515+
ErrorDiag, "'for', 'while', and 'do' statements">;
2516+
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
2517+
let Documentation = [SYCLIntelEnableLoopPipeliningAttrDocs];
2518+
}
2519+
2520+
def : MutualExclusions<[SYCLIntelDisableLoopPipelining,
2521+
SYCLIntelEnableLoopPipelining]>;
2522+
25122523
def SYCLIntelLocalNonConstVar : SubsetSubject<Var,
25132524
[{S->hasLocalStorage() &&
25142525
S->getKind() != Decl::ImplicitParam &&

clang/include/clang/Basic/AttrDocs.td

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3334,7 +3334,7 @@ disables pipelining of the loop or function data path, causing the loop
33343334
or function to be executed serially. Cannot be used on the same loop or
33353335
function, or in conjunction with ``max_interleaving``,
33363336
``speculated_iterations``, ``max_concurrency``, ``initiation_interval``,
3337-
``ivdep``, or ``max_reinvocation_delay``.
3337+
``ivdep``, ``max_reinvocation_delay`` or ``enable_loop_pipelining`` attribute.
33383338

33393339
.. code-block:: c++
33403340

@@ -3492,6 +3492,43 @@ conjunction with disable_loop_pipelining.
34923492
}];
34933493
}
34943494

3495+
def SYCLIntelEnableLoopPipeliningAttrDocs : Documentation {
3496+
let Category = DocCatVariable;
3497+
let Heading = "intel::enable_loop_pipelining";
3498+
let Content = [{
3499+
The ``enable_loop_pipelining`` attribute applies to a loop in SYCL device code.
3500+
Takes no arguments and enables pipelining of the loop. This attribute is useful
3501+
in the low-area flow, in which all loops are unpipelined by default. Cannot be
3502+
used on the same loop in conjunction with ``disable_loop_pipelining`` attribute.
3503+
3504+
.. code-block:: c++
3505+
3506+
void bar() {
3507+
int a[10];
3508+
[[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i) a[i] = 0;
3509+
}
3510+
3511+
void Array(int *array, size_t n) {
3512+
[[intel::enable_loop_pipelining]] for (int i = 0; i < n; ++i) array[i] = 0;
3513+
}
3514+
3515+
void count () {
3516+
int a1[10], i = 0;
3517+
[[intel::enable_loop_pipelining]] while (i < 10) {
3518+
a1[i++] = 3;
3519+
}
3520+
}
3521+
3522+
void check() {
3523+
int a = 10;
3524+
[[intel::enable_loop_pipelining]] do {
3525+
a = a + 1;
3526+
} while (a < 20);
3527+
}
3528+
3529+
}];
3530+
}
3531+
34953532
def SYCLIntelLoopFuseDocs : Documentation {
34963533
let Category = DocCatFunction;
34973534
let Heading = "loop_fuse, loop_fuse_independent";

clang/lib/CodeGen/CGLoopInfo.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,15 @@ MDNode *LoopInfo::createMetadata(
622622
LoopProperties.push_back(MDNode::get(Ctx, Vals));
623623
}
624624

625+
// enable_loop_pipelining attribute corresponds to
626+
// 'llvm.loop.intel.pipelining.enable, i32 1' metadata
627+
if (Attrs.SYCLLoopPipeliningEnable) {
628+
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.intel.pipelining.enable"),
629+
ConstantAsMetadata::get(
630+
ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
631+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
632+
}
633+
625634
LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
626635
AdditionalLoopProperties.end());
627636
return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms);
@@ -635,7 +644,7 @@ LoopAttributes::LoopAttributes(bool IsParallel)
635644
VectorizeScalable(LoopAttributes::Unspecified), InterleaveCount(0),
636645
SYCLIInterval(0), SYCLLoopCoalesceEnable(false),
637646
SYCLLoopCoalesceNLevels(0), SYCLLoopPipeliningDisable(false),
638-
UnrollCount(0), UnrollAndJamCount(0),
647+
SYCLLoopPipeliningEnable(false), UnrollCount(0), UnrollAndJamCount(0),
639648
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
640649
PipelineInitiationInterval(0), SYCLNofusionEnable(false),
641650
MustProgress(false) {}
@@ -656,6 +665,7 @@ void LoopAttributes::clear() {
656665
SYCLSpeculatedIterationsNIterations.reset();
657666
SYCLIntelFPGAVariantCount.clear();
658667
SYCLMaxReinvocationDelayNCycles.reset();
668+
SYCLLoopPipeliningEnable = false;
659669
UnrollCount = 0;
660670
UnrollAndJamCount = 0;
661671
VectorizeEnable = LoopAttributes::Unspecified;
@@ -693,8 +703,8 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
693703
!Attrs.SYCLSpeculatedIterationsNIterations &&
694704
Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 &&
695705
!Attrs.SYCLMaxReinvocationDelayNCycles &&
696-
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
697-
Attrs.PipelineInitiationInterval == 0 &&
706+
!Attrs.SYCLLoopPipeliningEnable && Attrs.UnrollAndJamCount == 0 &&
707+
!Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 &&
698708
Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
699709
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
700710
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
@@ -1027,6 +1037,8 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10271037
// For attribute max_reinvocation_delay:
10281038
// n - 'llvm.loop.intel.max_reinvocation_delay.count, i32 n' metadata will be
10291039
// emitted
1040+
// For attribute enable_loop_pipelining:
1041+
// 'llvm.loop.intel.pipelining.enable, i32 1' metadata will be emitted
10301042
for (const auto *A : Attrs) {
10311043
if (const auto *SYCLIntelIVDep = dyn_cast<SYCLIntelIVDepAttr>(A))
10321044
addSYCLIVDepInfo(Header->getContext(), SYCLIntelIVDep->getSafelenValue(),
@@ -1099,6 +1111,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10991111
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
11001112
setSYCLMaxReinvocationDelayNCycles(ArgVal.getSExtValue());
11011113
}
1114+
1115+
if (isa<SYCLIntelEnableLoopPipeliningAttr>(A))
1116+
setSYCLLoopPipeliningEnable();
11021117
}
11031118

11041119
setMustProgress(MustProgress);

clang/lib/CodeGen/CGLoopInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ struct LoopAttributes {
137137
// Value for llvm.loop.intel.max_reinvocation_delay metadata.
138138
llvm::Optional<unsigned> SYCLMaxReinvocationDelayNCycles;
139139

140+
/// Flag for llvm.loop.intel.pipelining.enable, i32 1 metadata.
141+
bool SYCLLoopPipeliningEnable;
142+
140143
/// llvm.unroll.
141144
unsigned UnrollCount;
142145

@@ -418,6 +421,11 @@ class LoopInfoStack {
418421
StagedAttrs.SYCLMaxReinvocationDelayNCycles = C;
419422
}
420423

424+
/// Set flag of enable_loop_pipelining for the next loop pushed.
425+
void setSYCLLoopPipeliningEnable() {
426+
StagedAttrs.SYCLLoopPipeliningEnable = true;
427+
}
428+
421429
private:
422430
/// Returns true if there is LoopInfo on the stack.
423431
bool hasInfo() const { return !Active.empty(); }

clang/lib/Sema/SemaStmtAttr.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,11 @@ static Attr * handleSYCLIntelMaxReinvocationDelayAttr(Sema &S, Stmt *St,
481481
return S.BuildSYCLIntelMaxReinvocationDelayAttr(A, E);
482482
}
483483

484+
static Attr *handleSYCLIntelEnableLoopPipeliningAttr(Sema &S, Stmt *,
485+
const ParsedAttr &A) {
486+
return new (S.Context) SYCLIntelEnableLoopPipeliningAttr(S.Context, A);
487+
}
488+
484489
static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A,
485490
SourceRange) {
486491
IdentifierLoc *PragmaNameLoc = A.getArgAsIdent(0);
@@ -895,6 +900,8 @@ static void CheckForIncompatibleSYCLLoopAttributes(
895900
CheckForDuplicationSYCLLoopAttribute<SYCLIntelNofusionAttr>(S, Attrs);
896901
CheckForDuplicationSYCLLoopAttribute<SYCLIntelMaxReinvocationDelayAttr>(
897902
S, Attrs);
903+
CheckForDuplicationSYCLLoopAttribute<SYCLIntelEnableLoopPipeliningAttr>(
904+
S, Attrs);
898905
}
899906

900907
void CheckForIncompatibleUnrollHintAttributes(
@@ -1042,6 +1049,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
10421049
return handleIntelNofusionAttr(S, St, A);
10431050
case ParsedAttr::AT_SYCLIntelMaxReinvocationDelay:
10441051
return handleSYCLIntelMaxReinvocationDelayAttr(S, St, A);
1052+
case ParsedAttr::AT_SYCLIntelEnableLoopPipelining:
1053+
return handleSYCLIntelEnableLoopPipeliningAttr(S, St, A);
10451054
default:
10461055
// N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a
10471056
// declaration attribute is not written on a statement, but this code is

clang/test/CodeGenSYCL/intel-fpga-loops.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
// CHECK: br label %for.cond, !llvm.loop ![[MD_MRD:[0-9]+]]
2424
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MRD_2:[0-9]+]]
2525
// CHECK: br label %for.cond13, !llvm.loop ![[MD_MRD_3:[0-9]+]]
26+
// CHECK: br label %for.cond, !llvm.loop ![[MD_FP:[0-9]+]]
27+
// CHECK: br label %while.cond, !llvm.loop ![[MD_FP_1:[0-9]+]]
28+
// CHECK: br i1 %cmp5, label %do.body, label %do.end, !llvm.loop ![[MD_FP_2:[0-9]+]]
2629

2730
void disable_loop_pipelining() {
2831
int a[10];
@@ -171,6 +174,27 @@ void max_reinvocation_delay() {
171174
a[i] = 0;
172175
}
173176

177+
// Add codeGen tests for loop attribute: [[intel::enable_loop_pipelining]].
178+
void fpga_enable_loop_pipelining() {
179+
int a[10];
180+
// CHECK: ![[MD_FP]] = distinct !{![[MD_FP]], ![[MP]], ![[MD_fpga_pipeline:[0-9]+]]}
181+
// CHECK-NEXT: ![[MD_fpga_pipeline]] = !{!"llvm.loop.intel.pipelining.enable", i32 1}
182+
[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i)
183+
a[i] = 0;
184+
185+
// CHECK: ![[MD_FP_1]] = distinct !{![[MD_FP_1]], ![[MP]], ![[MD_fpga_pipeline]]}
186+
int j = 0;
187+
[[intel::enable_loop_pipelining]] while (j < 10) {
188+
a[j++] = 3;
189+
}
190+
191+
// CHECK: ![[MD_FP_2]] = distinct !{![[MD_FP_2]], ![[MP]], ![[MD_fpga_pipeline]]}
192+
int b = 10;
193+
[[intel::enable_loop_pipelining]] do {
194+
b = b + 1;
195+
} while (b < 20);
196+
}
197+
174198
template <typename name, typename Func>
175199
__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
176200
kernelFunc();
@@ -187,6 +211,7 @@ int main() {
187211
speculated_iterations<4, 0>();
188212
loop_count_control<12>();
189213
max_reinvocation_delay<3, 1>();
214+
fpga_enable_loop_pipelining();
190215
});
191216
return 0;
192217
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -Wno-sycl-2017-compat -ast-dump %s | FileCheck %s
2+
3+
// Add AST tests for Loop attribute: [[intel::enable_loop_pipelining]].
4+
5+
#include "sycl.hpp"
6+
7+
using namespace sycl;
8+
queue q;
9+
10+
void fpga_enable_loop_pipelining() {
11+
int a1[10], a2[10];
12+
// CHECK: AttributedStmt
13+
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
14+
[[intel::enable_loop_pipelining]] for (int p = 0; p < 10; ++p) {
15+
a1[p] = a2[p] = 0;
16+
}
17+
18+
// CHECK: AttributedStmt
19+
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
20+
int i = 0;
21+
[[intel::enable_loop_pipelining]] while (i < 10) {
22+
a1[i] += 3;
23+
}
24+
25+
// CHECK: AttributedStmt
26+
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
27+
for (int i = 0; i < 10; ++i) {
28+
[[intel::enable_loop_pipelining]] for (int j = 0; j < 10; ++j) {
29+
a1[i] += a1[j];
30+
}
31+
}
32+
33+
// CHECK: AttributedStmt
34+
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
35+
int b = 10;
36+
[[intel::enable_loop_pipelining]] do {
37+
b = b + 1;
38+
} while (b < 20);
39+
40+
// CHECK: AttributedStmt
41+
// CHECK-NEXT: SYCLIntelEnableLoopPipeliningAttr
42+
int c[] = {0, 1, 2, 3, 4, 5};
43+
[[intel::enable_loop_pipelining]] for (int n : c) { n *= 2; }
44+
}
45+
46+
void foo() {
47+
q.submit([&](handler &h) {
48+
h.single_task<class kernel_function>([]() { fpga_enable_loop_pipelining(); });
49+
});
50+
}

clang/test/SemaSYCL/intel-fpga-loops.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ void foo() {
2828
[[intel::loop_count(8)]] int m[10];
2929
// expected-error@+1 {{'max_reinvocation_delay' attribute cannot be applied to a declaration}}
3030
[[intel::max_reinvocation_delay(1)]] int n[10];
31+
// expected-error@+1{{'enable_loop_pipelining' attribute cannot be applied to a declaration}}
32+
[[intel::enable_loop_pipelining]] int o[10];
3133
}
3234

3335
// Test for deprecated spelling of Intel FPGA loop attributes
@@ -127,6 +129,9 @@ void boo() {
127129
// expected-error@+1 {{'max_reinvocation_delay' attribute takes one argument}}
128130
[[intel::max_reinvocation_delay(5, 2)]] for (int i = 0; i != 10; ++i)
129131
a[i] = 0;
132+
// expected-error@+1 {{'enable_loop_pipelining' attribute takes no arguments}}
133+
[[intel::enable_loop_pipelining(0)]] for (int i = 0; i != 10; ++i)
134+
a[i] = 0;
130135
}
131136

132137
// Test for incorrect argument value for Intel FPGA loop attributes
@@ -227,6 +232,10 @@ void goo() {
227232
// expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'const char[8]'}}
228233
[[intel::max_reinvocation_delay("test123")]] for (int i = 0; i != 10; ++i)
229234
a[i] = 0;
235+
236+
// no diagnostics are expected
237+
[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i)
238+
a[i] = 0;
230239
}
231240

232241
// Test for Intel FPGA loop attributes duplication
@@ -350,6 +359,11 @@ void zoo() {
350359
// expected-error@+1{{duplicate Intel FPGA loop attribute 'max_reinvocation_delay'}}
351360
[[intel::max_reinvocation_delay(1)]] for (int i = 0; i != 10; ++i)
352361
a[i] = 0;
362+
363+
[[intel::enable_loop_pipelining]]
364+
// expected-error@+1 {{duplicate Intel FPGA loop attribute 'enable_loop_pipelining'}}
365+
[[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i)
366+
a[i] = 0;
353367
}
354368

355369
// Test for Intel FPGA loop attributes compatibility
@@ -394,6 +408,13 @@ void loop_attrs_compatibility() {
394408
// expected-note@+1 {{conflicting attribute is here}}
395409
[[intel::max_reinvocation_delay(1)]] [[intel::disable_loop_pipelining]] for (int i = 0; i != 10; ++i)
396410
a[i] = 0;
411+
// no diagnostics are expected
412+
[[intel::enable_loop_pipelining]] [[intel::loop_coalesce]] for (int i = 0; i != 10; ++i)
413+
a[i] = 0;
414+
// expected-error@+2 {{'disable_loop_pipelining' and 'enable_loop_pipelining' attributes are not compatible}}
415+
// expected-note@+1 {{conflicting attribute is here}}
416+
[[intel::enable_loop_pipelining]] [[intel::disable_loop_pipelining]] for (int i = 0; i != 10; ++i)
417+
a[i] = 0;
397418
}
398419

399420
template<int A, int B, int C>

0 commit comments

Comments
 (0)