Skip to content

Commit e1e8950

Browse files
vsemenov368igcbot
authored andcommitted
Report LSC stores with non default L1 cache controls in VC
Set zebin flag if a kernel has a store with non default L1 cache controls
1 parent 4a2ee08 commit e1e8950

File tree

5 files changed

+138
-28
lines changed

5 files changed

+138
-28
lines changed

IGC/VectorCompiler/igcdeps/src/cmc.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,9 @@ static void setExecutionInfo(const GenXOCLRuntimeInfo::KernelInfo &BackendInfo,
834834
ExecEnv.NumGRFRequired = JitterInfo.stats.numGRFTotal;
835835
ExecEnv.RequireDisableEUFusion = BackendInfo.requireDisableEUFusion();
836836
ExecEnv.IndirectStatelessCount = BackendInfo.getIndirectCount();
837+
ExecEnv.HasLscStoresWithNonDefaultL1CacheControls =
838+
BackendInfo.hasLscStoresWithNonDefaultL1CacheControls();
839+
837840

838841
// Allocate spill-fill buffer
839842
if (JitterInfo.hasStackcalls) {

IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ class GenXOCLRuntimeInfo : public ModulePass {
180180
bool UsesReadWriteImages = false;
181181
bool UsesSample = false;
182182
bool DisableMidThreadPreemption = false;
183+
bool HasLscStoresWithNonDefaultL1CacheControls = false;
183184

184185
unsigned GRFSizeInBytes;
185186
unsigned NumBarriers = 0;
@@ -277,6 +278,9 @@ class GenXOCLRuntimeInfo : public ModulePass {
277278
bool usesSample() const { return FuncInfo.UsesSample; }
278279
bool usesReadWriteImages() const { return FuncInfo.UsesReadWriteImages; }
279280
bool requireDisableEUFusion() const { return FuncInfo.DisableEUFusion; }
281+
bool hasLscStoresWithNonDefaultL1CacheControls() const {
282+
return FuncInfo.HasLscStoresWithNonDefaultL1CacheControls;
283+
}
280284

281285
// Arguments accessors.
282286
arg_iterator arg_begin() { return ArgInfos.begin(); }

IGC/VectorCompiler/include/vc/InternalIntrinsics/Intrinsic_definitions.py

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ========================== begin_copyright_notice ============================
22
#
3-
# Copyright (C) 2022-2024 Intel Corporation
3+
# Copyright (C) 2022-2025 Intel Corporation
44
#
55
# SPDX-License-Identifier: MIT
66
#
@@ -105,8 +105,8 @@
105105
##
106106
## * Return value: private/local/global pointer
107107
##
108-
## This intrisic attempts to explicitly convert a generic ptr to a
109-
## private/local/global ptr. If the cast fails the intrisic returns null pointer.
108+
## This intrinsic attempts to explicitly convert a generic ptr to a
109+
## private/local/global ptr. If the cast fails the intrinsic returns null pointer.
110110
"cast_to_ptr_explicit" : { "result": "anyptr",
111111
"arguments": ["ptr_generic"],
112112
"attributes": "None",
@@ -216,11 +216,15 @@
216216
"atomic_fmin" : { "result": "anyfloat",
217217
"arguments": [ "anyptr", "int", "int",
218218
"anyfloat"],
219-
"attributes": "SideEffects", },
219+
"attributes": "None",
220+
"memory_effects":
221+
{ "access": "ModRef" }, },
220222
"atomic_fmax" : { "result": "anyfloat",
221223
"arguments": [ "anyptr", "int", "int",
222224
"anyfloat"],
223-
"attributes": "SideEffects", },
225+
"attributes": "None",
226+
"memory_effects":
227+
{ "access": "ModRef" }, },
224228

225229
## ``llvm.vc.internal.rsqrtm`` : computes component-wise reciprocal square root
226230
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -302,7 +306,9 @@
302306
"target" : [
303307
"hasLSCMessages",
304308
],
305-
"attributes": "SideEffects", },
309+
"attributes": "None",
310+
"memory_effects":
311+
{ "access": "ModRef" }, },
306312
"lsc_atomic_bss": { "result": "anyvector",
307313
"arguments": [
308314
"anyint", # vNxi1, predicate
@@ -321,7 +327,9 @@
321327
"target" : [
322328
"hasLSCMessages",
323329
],
324-
"attributes": "SideEffects", },
330+
"attributes": "None",
331+
"memory_effects":
332+
{ "access": "ModRef" }, },
325333
"lsc_atomic_slm": { "result": "anyvector",
326334
"arguments": [
327335
"anyint", # vNxi1, predicate
@@ -340,7 +348,9 @@
340348
"target" : [
341349
"hasLSCMessages",
342350
],
343-
"attributes": "SideEffects", },
351+
"attributes": "None",
352+
"memory_effects":
353+
{ "access": "ModRef" }, },
344354
"lsc_atomic_ugm": { "result": "anyvector",
345355
"arguments": [
346356
"anyint", # vNxi1, predicate
@@ -359,7 +369,9 @@
359369
"target" : [
360370
"hasLSCMessages",
361371
],
362-
"attributes": "SideEffects", },
372+
"attributes": "None",
373+
"memory_effects":
374+
{ "access": "ModRef" }, },
363375

364376
## ``llvm.vc.internal.lsc.load.*`` : LSC load intrinsics
365377
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -849,7 +861,7 @@
849861
"anyint", # cache controls
850862
"char", # number of blocks
851863
"short", # block width
852-
"short", # block heigth
864+
"short", # block height
853865
"long", # memory base address
854866
"int", # memory matrix width (minus 1)
855867
"int", # memory matrix height (minus 1)
@@ -873,7 +885,7 @@
873885
"anyint", # cache controls
874886
"char", # number of blocks
875887
"short", # block width
876-
"short", # block heigth
888+
"short", # block height
877889
"long", # memory base address
878890
"int", # memory matrix width (minus 1)
879891
"int", # memory matrix height (minus 1)
@@ -897,7 +909,7 @@
897909
"anyint", # cache controls
898910
"char", # number of blocks
899911
"short", # block width
900-
"short", # block heigth
912+
"short", # block height
901913
"long", # memory base address
902914
"int", # memory matrix width (minus 1)
903915
"int", # memory matrix height (minus 1)
@@ -921,7 +933,7 @@
921933
"anyint", # cache controls
922934
"char", # number of blocks
923935
"short", # block width
924-
"short", # block heigth
936+
"short", # block height
925937
"long", # memory base address
926938
"int", # memory matrix width (minus 1)
927939
"int", # memory matrix height (minus 1)
@@ -942,7 +954,7 @@
942954
"anyint", # cache controls
943955
"char", # number of blocks
944956
"short", # block width
945-
"short", # block heigth
957+
"short", # block height
946958
"long", # memory base address
947959
"int", # memory matrix width (minus 1)
948960
"int", # memory matrix height (minus 1)
@@ -1324,7 +1336,7 @@
13241336
## * arg0: vNi1 Predicate (overloaded)
13251337
## * arg1: i16, Opcode [MBC]
13261338
## * arg2: i8, Channel mask [MBC]
1327-
## * arg3: i16, Address offset packed immediates (aoffimmi) [MBC]
1339+
## * arg3: i16, Address offset packed immediate (aoffimmi) [MBC]
13281340
## * arg4: i32, Surface BTI
13291341
## * arg5: vector to take values for masked simd lanes from
13301342
## * arg6: vNi32 or vNi16, first sampler message parameter (overloaded)
@@ -1607,7 +1619,9 @@
16071619
"target" : [
16081620
"!noLegacyDataport"
16091621
],
1610-
"attributes" : "SideEffects" },
1622+
"attributes": "None",
1623+
"memory_effects":
1624+
{ "access": "ModRef" }, },
16111625
"typed_atomic_sub_predef_surface" : { "result" : "anyvector",
16121626
"arguments" : [
16131627
"anyvector",
@@ -1621,7 +1635,9 @@
16211635
"target" : [
16221636
"!noLegacyDataport"
16231637
],
1624-
"attributes" : "SideEffects" },
1638+
"attributes": "None",
1639+
"memory_effects":
1640+
{ "access": "ModRef" }, },
16251641
"typed_atomic_min_predef_surface" : { "result" : "anyvector",
16261642
"arguments" : [
16271643
"anyvector",
@@ -1635,7 +1651,9 @@
16351651
"target" : [
16361652
"!noLegacyDataport"
16371653
],
1638-
"attributes" : "SideEffects" },
1654+
"attributes": "None",
1655+
"memory_effects":
1656+
{ "access": "ModRef" }, },
16391657
"typed_atomic_max_predef_surface" : { "result" : "anyvector",
16401658
"arguments" : [
16411659
"anyvector",
@@ -1649,7 +1667,9 @@
16491667
"target" : [
16501668
"!noLegacyDataport"
16511669
],
1652-
"attributes" : "SideEffects" },
1670+
"attributes": "None",
1671+
"memory_effects":
1672+
{ "access": "ModRef" }, },
16531673

16541674
"typed_atomic_xchg_predef_surface" : { "result" : "anyvector",
16551675
"arguments" : [
@@ -1664,7 +1684,9 @@
16641684
"target" : [
16651685
"!noLegacyDataport"
16661686
],
1667-
"attributes" : "SideEffects" },
1687+
"attributes": "None",
1688+
"memory_effects":
1689+
{ "access": "ModRef" }, },
16681690
"typed_atomic_and_predef_surface" : { "result" : "anyvector",
16691691
"arguments" : [
16701692
"anyvector",
@@ -1678,7 +1700,9 @@
16781700
"target" : [
16791701
"!noLegacyDataport"
16801702
],
1681-
"attributes" : "SideEffects" },
1703+
"attributes": "None",
1704+
"memory_effects":
1705+
{ "access": "ModRef" }, },
16821706
"typed_atomic_or_predef_surface" : { "result" : "anyvector",
16831707
"arguments" : [
16841708
"anyvector",
@@ -1692,7 +1716,9 @@
16921716
"target" : [
16931717
"!noLegacyDataport"
16941718
],
1695-
"attributes" : "SideEffects" },
1719+
"attributes": "None",
1720+
"memory_effects":
1721+
{ "access": "ModRef" }, },
16961722

16971723

16981724
"typed_atomic_xor_predef_surface" : { "result" : "anyvector",
@@ -1708,7 +1734,9 @@
17081734
"target" : [
17091735
"!noLegacyDataport"
17101736
],
1711-
"attributes" : "SideEffects" },
1737+
"attributes": "None",
1738+
"memory_effects":
1739+
{ "access": "ModRef" }, },
17121740
"typed_atomic_imin_predef_surface" : { "result" : "anyvector",
17131741
"arguments" : [
17141742
"anyvector",
@@ -1722,7 +1750,9 @@
17221750
"target" : [
17231751
"!noLegacyDataport"
17241752
],
1725-
"attributes" : "SideEffects" },
1753+
"attributes": "None",
1754+
"memory_effects":
1755+
{ "access": "ModRef" }, },
17261756
"typed_atomic_imax_predef_surface" : { "result" : "anyvector",
17271757
"arguments" : [
17281758
"anyvector",
@@ -1736,7 +1766,9 @@
17361766
"target" : [
17371767
"!noLegacyDataport"
17381768
],
1739-
"attributes" : "SideEffects" },
1769+
"attributes": "None",
1770+
"memory_effects":
1771+
{ "access": "ModRef" }, },
17401772

17411773
## ``llvm.vc.internal.typed.atomic.*.predef.surface.*`` : legacy atomic typed predefined surface
17421774
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1766,7 +1798,9 @@
17661798
"target" : [
17671799
"!noLegacyDataport"
17681800
],
1769-
"attributes" : "SideEffects" },
1801+
"attributes": "None",
1802+
"memory_effects":
1803+
{ "access": "ModRef" }, },
17701804
"typed_atomic_dec_predef_surface" : { "result" : "anyvector",
17711805
"arguments" : [
17721806
"anyvector", # predicate
@@ -1779,7 +1813,9 @@
17791813
"target" : [
17801814
"!noLegacyDataport"
17811815
],
1782-
"attributes" : "SideEffects" },
1816+
"attributes": "None",
1817+
"memory_effects":
1818+
{ "access": "ModRef" }, },
17831819

17841820
## ``llvm.vc.internal.typed.atomic.*.predef.surface.cmpxchg.*`` : legacy atomic typed CMPXCHG predefined surface
17851821
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1813,7 +1849,9 @@
18131849
"target" : [
18141850
"!noLegacyDataport"
18151851
],
1816-
"attributes" : "SideEffects" },
1852+
"attributes": "None",
1853+
"memory_effects":
1854+
{ "access": "ModRef" }, },
18171855

18181856
## ``llvm.vc.internal.gather4.typed.predef.surface.*`` : legacy cmask typed load predefined surface
18191857
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,21 @@ void GenXOCLRuntimeInfo::FunctionInfo::initInstructionLevelProperties(
378378
UsesDPAS = true;
379379
break;
380380
}
381+
382+
// a store intrinsic
383+
if (auto *CI = dyn_cast<CallInst>(&Inst);
384+
CI && !CI->doesNotAccessMemory() && !CI->onlyReadsMemory()) {
385+
// a store intrinsic has cache opt
386+
if (auto CacheOptsIndex =
387+
vc::InternalIntrinsic::getMemoryCacheControlOperandIndex(IID);
388+
CacheOptsIndex >= 0) {
389+
auto *CacheOpts = cast<Constant>(Inst.getOperand(CacheOptsIndex));
390+
auto *L1Opt = cast<ConstantInt>(CacheOpts->getAggregateElement(0u));
391+
HasLscStoresWithNonDefaultL1CacheControls |=
392+
static_cast<LSC_CACHE_OPT>(L1Opt->getZExtValue()) !=
393+
LSC_CACHING_DEFAULT;
394+
}
395+
}
381396
}
382397
}
383398

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; REQUIRES: regkeys, ptl-supported
10+
; RUN: llvm-as %s -opaque-pointers=0 -o %t.bc
11+
; RUN: ocloc -device ptl -llvm_input -options "-vc-codegen -igc_opts 'ShaderDumpEnable=1, DumpToCustomDir=%t'" -output_no_suffix -file %t.bc
12+
; RUN: cat %t/*.zeinfo | FileCheck %s
13+
14+
declare void @llvm.vc.internal.lsc.store.ugm.v8i1.v2i8.v8i64.v8i32(<8 x i1>, i8, i8, i8, <2 x i8>, i64, <8 x i64>, i16, i32, <8 x i32>)
15+
declare <16 x i32> @llvm.vc.internal.lsc.atomic.ugm.v16i32.v16i1.v2i8.v16i64(<16 x i1>, i8, i8, i8, <2 x i8>, i32, <16 x i64>, i16, i32, <16 x i32>, <16 x i32>, <16 x i32>)
16+
declare void @llvm.vc.internal.lsc.store.ugm.v16i1.v2i8.v16i32.v16i64(<16 x i1>, i8, i8, i8, <2 x i8>, i64, <16 x i64>, i16, i32, <16 x i32>)
17+
18+
; CHECK-LABEL: - name: test_uncached
19+
; CHECK: execution_env:
20+
; CHECK: has_lsc_stores_with_non_default_l1_cache_controls: true
21+
22+
define dllexport spir_kernel void @test_uncached() #0 {
23+
tail call void @llvm.vc.internal.lsc.store.ugm.v8i1.v2i8.v8i64.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i8 3, i8 3, i8 1, <2 x i8> <i8 1, i8 3>, i64 0, <8 x i64> zeroinitializer, i16 1, i32 0, <8 x i32> zeroinitializer)
24+
ret void
25+
}
26+
27+
; CHECK-LABEL: - name: test_default
28+
; CHECK: execution_env:
29+
; CHECK-NOT: has_lsc_stores_with_non_default_l1_cache_controls: true
30+
31+
define dllexport spir_kernel void @test_default() #0 {
32+
tail call void @llvm.vc.internal.lsc.store.ugm.v8i1.v2i8.v8i64.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i8 3, i8 3, i8 1, <2 x i8> zeroinitializer, i64 0, <8 x i64> zeroinitializer, i16 1, i32 0, <8 x i32> zeroinitializer)
33+
ret void
34+
}
35+
36+
; CHECK-LABEL: - name: test_both
37+
; CHECK: execution_env:
38+
; CHECK: has_lsc_stores_with_non_default_l1_cache_controls: true
39+
40+
define dllexport spir_kernel void @test_both() #0 {
41+
%ret = call <16 x i32> @llvm.vc.internal.lsc.atomic.ugm.v16i32.v16i1.v2i8.v16i64(<16 x i1><i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i8 12, i8 3, i8 6, <2 x i8> <i8 1, i8 3>, i32 0, <16 x i64> zeroinitializer, i16 1, i32 0, <16 x i32> zeroinitializer, <16 x i32> undef, <16 x i32> zeroinitializer)
42+
call void @llvm.vc.internal.lsc.store.ugm.v16i1.v2i8.v16i32.v16i64(<16 x i1><i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i8 3, i8 6, i8 1, <2 x i8> zeroinitializer, i64 0, <16 x i64> zeroinitializer, i16 1, i32 0, <16 x i32> %ret)
43+
ret void
44+
}
45+
46+
attributes #0 = { noinline "VCFunction" }
47+
48+
!spirv.Source = !{!0}
49+
50+
!0 = !{i32 0, i32 100000}

0 commit comments

Comments
 (0)