-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[mlir][ROCDL] Plumb through AMDGPU memory access metadata #110916
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The LLVM backend has moved from function-wide attributes for making assurances about potentially unsafe atomic operations (like "unsafe-fp-atomics") to metadata on individual atomic operations. This commit adds support for generating this metadata from MLIR.
@llvm/pr-subscribers-mlir-llvm Author: Krzysztof Drewniak (krzysz00) ChangesThe LLVM backend has moved from function-wide attributes for making assurances about potentially unsafe atomic operations (like "unsafe-fp-atomics") to metadata on individual atomic operations. This commit adds support for generating this metadata from MLIR. Full diff: https://github.com/llvm/llvm-project/pull/110916.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index 2da45eba77655b..fae2fe9cc3f8d6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -1055,6 +1055,7 @@ def LLVM_ConstantRangeAttr : LLVM_Attr<"ConstantRange", "constant_range"> {
Syntax:
```
`<` `i`(width($lower)) $lower `,` $upper `>`
+ ```
}];
let builders = [
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index aae2cf88ded041..1d515b2b7c801c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -58,7 +58,12 @@ def ROCDL_Dialect : Dialect {
"::mlir::StringAttr":$flat_work_group_size,
"::mlir::IntegerAttr":$max_flat_work_group_size,
"::mlir::IntegerAttr":$waves_per_eu,
- "::mlir::BoolAttr":$unsafe_fp_atomics
+ "::mlir::BoolAttr":$unsafe_fp_atomics,
+ // Correspond to LLVM matadata of the same name
+ "::mlir::UnitAttr":$last_use,
+ "::mlir::UnitAttr":$no_remote_memory,
+ "::mlir::UnitAttr":$no_fine_grained_memory,
+ "::mlir::UnitAttr":$ignore_denormal_mode
);
let useDefaultAttributePrinterParser = 1;
@@ -88,7 +93,7 @@ class ROCDL_IntrPure1Op<string mnemonic> :
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
- int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
+ int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
index ec21fbf714c24a..88a9d4c2a7ef23 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
@@ -77,6 +77,7 @@ class ROCDLDialectLLVMIRTranslationInterface
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final {
auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
+ llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
if (!func)
@@ -198,7 +199,6 @@ class ROCDLDialectLLVMIRTranslationInterface
if (!value)
return op->emitOpError(Twine(attribute.getName()) +
" must be a dense i32 array attribute");
- llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
SmallVector<llvm::Metadata *, 3> metadata;
llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
for (int32_t i : value.asArrayRef()) {
@@ -210,6 +210,31 @@ class ROCDLDialectLLVMIRTranslationInterface
llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
llvmFunc->setMetadata("reqd_work_group_size", node);
}
+
+ // Atomic and nontemporal metadata
+ if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.no.remote.memory",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.no.fine.grained.memory",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.ignore.denormal.mode",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+
return success();
}
};
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 08c2d4e6477970..97276b087b7e93 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -564,11 +564,34 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
}
llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> {
+ // CHECK-LABEL: @rocdl_16bit_packed_floats
// CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}})
%source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16>
llvm.return %source : vector<2xf16>
}
+llvm.func @rocdl_atomic_attrs(%ptr: !llvm.ptr<1>, %data: f32) {
+ // CHECK-LABEL: @rocdl_atomic_attrs
+ // CHECK: atomicrmw
+ // CHECK-SAME: !amdgpu.ignore.denormal.mode
+ // CHECK-SAME: !amdgpu.no.fine.grained.memory
+ // CHECK-SAME: !amdgpu.no.remote.memory
+ llvm.atomicrmw fadd %ptr, %data monotonic {
+ rocdl.ignore_denormal_mode,
+ rocdl.no_fine_grained_memory,
+ rocdl.no_remote_memory} : !llvm.ptr<1>, f32
+ llvm.return
+}
+
+llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
+ // CHECK-LABEL: @rocdl_last_use
+ // CHECK: %[[ret:.+]] = load
+ // CHECK-SAME: !amdgpu.last.use
+ // CHECK: ret i32 %[[ret]]
+ %ret = llvm.load %ptr {rocdl.last_use} : !llvm.ptr<1> -> i32
+ llvm.return %ret : i32
+}
+
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
|
@llvm/pr-subscribers-mlir Author: Krzysztof Drewniak (krzysz00) ChangesThe LLVM backend has moved from function-wide attributes for making assurances about potentially unsafe atomic operations (like "unsafe-fp-atomics") to metadata on individual atomic operations. This commit adds support for generating this metadata from MLIR. Full diff: https://github.com/llvm/llvm-project/pull/110916.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index 2da45eba77655b..fae2fe9cc3f8d6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -1055,6 +1055,7 @@ def LLVM_ConstantRangeAttr : LLVM_Attr<"ConstantRange", "constant_range"> {
Syntax:
```
`<` `i`(width($lower)) $lower `,` $upper `>`
+ ```
}];
let builders = [
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index aae2cf88ded041..1d515b2b7c801c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -58,7 +58,12 @@ def ROCDL_Dialect : Dialect {
"::mlir::StringAttr":$flat_work_group_size,
"::mlir::IntegerAttr":$max_flat_work_group_size,
"::mlir::IntegerAttr":$waves_per_eu,
- "::mlir::BoolAttr":$unsafe_fp_atomics
+ "::mlir::BoolAttr":$unsafe_fp_atomics,
+ // Correspond to LLVM matadata of the same name
+ "::mlir::UnitAttr":$last_use,
+ "::mlir::UnitAttr":$no_remote_memory,
+ "::mlir::UnitAttr":$no_fine_grained_memory,
+ "::mlir::UnitAttr":$ignore_denormal_mode
);
let useDefaultAttributePrinterParser = 1;
@@ -88,7 +93,7 @@ class ROCDL_IntrPure1Op<string mnemonic> :
class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
list<int> overloadedOperands, list<Trait> traits, int numResults,
- int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
+ int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
list<string> immArgAttrNames = []> :
LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
"amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
index ec21fbf714c24a..88a9d4c2a7ef23 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
@@ -77,6 +77,7 @@ class ROCDLDialectLLVMIRTranslationInterface
NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final {
auto *dialect = dyn_cast<ROCDL::ROCDLDialect>(attribute.getNameDialect());
+ llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
if (dialect->getKernelAttrHelper().getName() == attribute.getName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
if (!func)
@@ -198,7 +199,6 @@ class ROCDLDialectLLVMIRTranslationInterface
if (!value)
return op->emitOpError(Twine(attribute.getName()) +
" must be a dense i32 array attribute");
- llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
SmallVector<llvm::Metadata *, 3> metadata;
llvm::Type *i32 = llvm::IntegerType::get(llvmContext, 32);
for (int32_t i : value.asArrayRef()) {
@@ -210,6 +210,31 @@ class ROCDLDialectLLVMIRTranslationInterface
llvm::MDNode *node = llvm::MDNode::get(llvmContext, metadata);
llvmFunc->setMetadata("reqd_work_group_size", node);
}
+
+ // Atomic and nontemporal metadata
+ if (dialect->getLastUseAttrHelper().getName() == attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.last.use", llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getNoRemoteMemoryAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.no.remote.memory",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getNoFineGrainedMemoryAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.no.fine.grained.memory",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+ if (dialect->getIgnoreDenormalModeAttrHelper().getName() ==
+ attribute.getName()) {
+ for (llvm::Instruction *i : instructions)
+ i->setMetadata("amdgpu.ignore.denormal.mode",
+ llvm::MDNode::get(llvmContext, {}));
+ }
+
return success();
}
};
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 08c2d4e6477970..97276b087b7e93 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -564,11 +564,34 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
}
llvm.func @rocdl_16bit_packed_floats(%sourceA: f32, %sourceB: f32) -> vector<2xf16> {
+ // CHECK-LABEL: @rocdl_16bit_packed_floats
// CHECK: call <2 x half> @llvm.amdgcn.cvt.pkrtz(float {{.*}}, float {{.*}})
%source = rocdl.cvt.pkrtz %sourceA, %sourceB : vector<2xf16>
llvm.return %source : vector<2xf16>
}
+llvm.func @rocdl_atomic_attrs(%ptr: !llvm.ptr<1>, %data: f32) {
+ // CHECK-LABEL: @rocdl_atomic_attrs
+ // CHECK: atomicrmw
+ // CHECK-SAME: !amdgpu.ignore.denormal.mode
+ // CHECK-SAME: !amdgpu.no.fine.grained.memory
+ // CHECK-SAME: !amdgpu.no.remote.memory
+ llvm.atomicrmw fadd %ptr, %data monotonic {
+ rocdl.ignore_denormal_mode,
+ rocdl.no_fine_grained_memory,
+ rocdl.no_remote_memory} : !llvm.ptr<1>, f32
+ llvm.return
+}
+
+llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
+ // CHECK-LABEL: @rocdl_last_use
+ // CHECK: %[[ret:.+]] = load
+ // CHECK-SAME: !amdgpu.last.use
+ // CHECK: ret i32 %[[ret]]
+ %ret = llvm.load %ptr {rocdl.last_use} : !llvm.ptr<1> -> i32
+ llvm.return %ret : i32
+}
+
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The LLVM backend has moved from function-wide attributes for making assurances about potentially unsafe atomic operations (like "unsafe-fp-atomics") to metadata on individual atomic operations.
This commit adds support for generating this metadata from MLIR.