Skip to content

Commit

Permalink
[mlir][GPU] Implement ValueBoundsOpInterface for GPU ID operations (l…
Browse files Browse the repository at this point in the history
…lvm#122190)

The GPU ID operations already implement InferIntRangeInterface, which
gives constant lower and upper bounds on those IDs when appropriate
metadata is prentent on the operations or in the surrounding context.

This commit uses that existing code to implement the
ValueBoundsOpInterface, which is used when analyzing affine operations
(unlike the integer range interface, which is used for arithmetic
optimization).

It also implements the interface for gpu.launch, where we can use it to
express the constraint that block/grid sizes are equal to their value
from outside the launch op and that the corresponding IDs are bounded
above by that size.

As a consequence, the test pass for this inference is updated to work on
a FunctionOpInterface and not a func.func, creating minor churn in other
tests.
  • Loading branch information
krzysz00 authored Jan 9, 2025
1 parent 1b897f7 commit 0aa831e
Show file tree
Hide file tree
Showing 16 changed files with 317 additions and 15 deletions.
19 changes: 19 additions & 0 deletions mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===- ValueBoundsOpInterfaceImpl.h - Impl. of ValueBoundsOpInterface -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
#define MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H

namespace mlir {
class DialectRegistry;

namespace gpu {
void registerValueBoundsOpInterfaceExternalModels(DialectRegistry &registry);
} // namespace gpu
} // namespace mlir
#endif // MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
2 changes: 2 additions & 0 deletions mlir/include/mlir/InitAllDialects.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "mlir/Dialect/EmitC/IR/EmitC.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h"
#include "mlir/Dialect/IRDL/IR/IRDL.h"
#include "mlir/Dialect/Index/IR/IndexDialect.h"
Expand Down Expand Up @@ -164,6 +165,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
cf::registerBufferizableOpInterfaceExternalModels(registry);
cf::registerBufferDeallocationOpInterfaceExternalModels(registry);
gpu::registerBufferDeallocationOpInterfaceExternalModels(registry);
gpu::registerValueBoundsOpInterfaceExternalModels(registry);
LLVM::registerInlinerInterface(registry);
linalg::registerAllDialectInterfaceImplementations(registry);
linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
Expand Down
3 changes: 2 additions & 1 deletion mlir/lib/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_mlir_dialect_library(MLIRGPUDialect
IR/GPUDialect.cpp
IR/InferIntRangeInterfaceImpls.cpp
IR/ValueBoundsOpInterfaceImpl.cpp

ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
Expand Down Expand Up @@ -40,7 +41,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
Transforms/ShuffleRewriter.cpp
Transforms/SPIRVAttachTarget.cpp
Transforms/SubgroupReduceLowering.cpp

OBJECT

ADDITIONAL_HEADER_DIRS
Expand Down
5 changes: 5 additions & 0 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/FunctionImplementation.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include "mlir/Transforms/InliningUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
Expand Down Expand Up @@ -217,6 +218,10 @@ void GPUDialect::initialize() {
addInterfaces<GPUInlinerInterface>();
declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
TerminatorOp>();
declarePromisedInterfaces<
ValueBoundsOpInterface, ClusterDimOp, ClusterDimBlocksOp, ClusterIdOp,
ClusterBlockIdOp, BlockDimOp, BlockIdOp, GridDimOp, ThreadIdOp, LaneIdOp,
SubgroupIdOp, GlobalIdOp, NumSubgroupsOp, SubgroupSizeOp, LaunchOp>();
}

static std::string getSparseHandleKeyword(SparseHandleKind kind) {
Expand Down
114 changes: 114 additions & 0 deletions mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//===- ValueBoundsOpInterfaceImpl.cpp - Impl. of ValueBoundsOpInterface ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"

#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Interfaces/InferIntRangeInterface.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"

using namespace mlir;
using namespace mlir::gpu;

namespace {
/// Implement ValueBoundsOpInterface (which only works on index-typed values,
/// gathers a set of constraint expressions, and is used for affine analyses)
/// in terms of InferIntRangeInterface (which works
/// on arbitrary integer types, creates [min, max] ranges, and is used in for
/// arithmetic simplification).
template <typename Op>
struct GpuIdOpInterface
: public ValueBoundsOpInterface::ExternalModel<GpuIdOpInterface<Op>, Op> {
void populateBoundsForIndexValue(Operation *op, Value value,
ValueBoundsConstraintSet &cstr) const {
auto inferrable = cast<InferIntRangeInterface>(op);
assert(value == op->getResult(0) &&
"inferring for value that isn't the GPU op's result");
auto translateConstraint = [&](Value v, const ConstantIntRanges &range) {
assert(v == value &&
"GPU ID op inferring values for something that's not its result");
cstr.bound(v) >= range.smin().getSExtValue();
cstr.bound(v) <= range.smax().getSExtValue();
};
assert(inferrable->getNumOperands() == 0 && "ID ops have no operands");
inferrable.inferResultRanges({}, translateConstraint);
}
};

struct GpuLaunchOpInterface
: public ValueBoundsOpInterface::ExternalModel<GpuLaunchOpInterface,
LaunchOp> {
void populateBoundsForIndexValue(Operation *op, Value value,
ValueBoundsConstraintSet &cstr) const {
auto launchOp = cast<LaunchOp>(op);

Value sizeArg = nullptr;
bool isSize = false;
KernelDim3 gridSizeArgs = launchOp.getGridSizeOperandValues();
KernelDim3 blockSizeArgs = launchOp.getBlockSizeOperandValues();

auto match = [&](KernelDim3 bodyArgs, KernelDim3 externalArgs,
bool areSizeArgs) {
if (value == bodyArgs.x) {
sizeArg = externalArgs.x;
isSize = areSizeArgs;
}
if (value == bodyArgs.y) {
sizeArg = externalArgs.y;
isSize = areSizeArgs;
}
if (value == bodyArgs.z) {
sizeArg = externalArgs.z;
isSize = areSizeArgs;
}
};
match(launchOp.getThreadIds(), blockSizeArgs, false);
match(launchOp.getBlockSize(), blockSizeArgs, true);
match(launchOp.getBlockIds(), gridSizeArgs, false);
match(launchOp.getGridSize(), gridSizeArgs, true);
if (launchOp.hasClusterSize()) {
KernelDim3 clusterSizeArgs = *launchOp.getClusterSizeOperandValues();
match(*launchOp.getClusterIds(), clusterSizeArgs, false);
match(*launchOp.getClusterSize(), clusterSizeArgs, true);
}

if (!sizeArg)
return;
if (isSize) {
cstr.bound(value) == cstr.getExpr(sizeArg);
cstr.bound(value) >= 1;
} else {
cstr.bound(value) < cstr.getExpr(sizeArg);
cstr.bound(value) >= 0;
}
}
};
} // namespace

void mlir::gpu::registerValueBoundsOpInterfaceExternalModels(
DialectRegistry &registry) {
registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) {
#define REGISTER(X) X::attachInterface<GpuIdOpInterface<X>>(*ctx);
REGISTER(ClusterDimOp)
REGISTER(ClusterDimBlocksOp)
REGISTER(ClusterIdOp)
REGISTER(ClusterBlockIdOp)
REGISTER(BlockDimOp)
REGISTER(BlockIdOp)
REGISTER(GridDimOp)
REGISTER(ThreadIdOp)
REGISTER(LaneIdOp)
REGISTER(SubgroupIdOp)
REGISTER(GlobalIdOp)
REGISTER(NumSubgroupsOp)
REGISTER(SubgroupSizeOp)
#undef REGISTER

LaunchOp::attachInterface<GpuLaunchOpInterface>(*ctx);
});
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)>
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Affine/value-bounds-reification.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args use-arith-ops" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args use-arith-ops}))' \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-ARITH

// CHECK-LABEL: func @reify_through_chain(
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

// RUN: mlir-opt %s -test-affine-reify-value-bounds="use-arith-ops" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{use-arith-ops}))' \
// RUN: -verify-diagnostics -split-input-file | \
// RUN: FileCheck %s --check-prefix=CHECK-ARITH

Expand Down
159 changes: 159 additions & 0 deletions mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// RUN: mlir-opt %s -pass-pipeline='builtin.module( \
// RUN: func.func(test-affine-reify-value-bounds), \
// RUN: gpu.module(llvm.func(test-affine-reify-value-bounds)), \
// RUN: gpu.module(gpu.func(test-affine-reify-value-bounds)))' \
// RUN: -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @launch_func
func.func @launch_func(%arg0 : index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
%c64 = arith.constant 64 : index
gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %arg0, %grid_dim_y = %c4, %grid_dim_z = %c2)
threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %c64, %block_dim_y = %c4, %block_dim_z = %c2) {

// Sanity checks:
// expected-error @below{{unknown}}
"test.compare" (%thread_id_x, %c1) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{false}}
"test.compare" (%thread_id_x, %c64) {cmp = "GE"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare" (%grid_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%grid_dim_x, %arg0) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%grid_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%grid_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare"(%block_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_x, %arg0) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_z, %c2) {cmp = "LT"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare" (%block_dim_x, %c64) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%block_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%block_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c64) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_z, %c2) {cmp = "LT"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
gpu.terminator
}

func.return
}

// -----

// The tests for what the ranges are are located in int-range-interface.mlir,
// so here we just make sure that the results of that interface propagate into
// constraints.

// CHECK-LABEL: func @kernel
module attributes {gpu.container_module} {
gpu.module @gpu_module {
llvm.func @kernel() attributes {gpu.kernel} {

%c0 = arith.constant 0 : index
%ctid_max = arith.constant 4294967295 : index
%thread_id_x = gpu.thread_id x

// expected-remark @below{{true}}
"test.compare" (%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%thread_id_x, %ctid_max) {cmp = "LT"} : (index, index) -> ()
llvm.return
}
}
}

// -----

// CHECK-LABEL: func @annotated_kernel
module attributes {gpu.container_module} {
gpu.module @gpu_module {
gpu.func @annotated_kernel() kernel
attributes {known_block_size = array<i32: 8, 12, 16>,
known_grid_size = array<i32: 20, 24, 28>} {

%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%thread_id_x = gpu.thread_id x

// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()

%block_dim_x = gpu.block_dim x
// expected-remark @below{{true}}
"test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
gpu.return
}
}
}

// -----

// CHECK-LABEL: func @local_bounds_kernel
module attributes {gpu.container_module} {
gpu.module @gpu_module {
gpu.func @local_bounds_kernel() kernel {

%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c8 = arith.constant 8 : index

%block_dim_x = gpu.block_dim x upper_bound 8
// expected-remark @below{{true}}
"test.compare"(%block_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_dim_x, %c8) {cmp = "LE"} : (index, index) -> ()
// expected-error @below{{unknown}}
"test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()

%thread_id_x = gpu.thread_id x upper_bound 8
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()

// Note: there isn't a way to express the ID <= size constraint
// in this form
// expected-error @below{{unknown}}
"test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
gpu.return
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @linalg_fill(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @memref_alloc(
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

// CHECK-LABEL: func @scf_for(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

func.func @unknown_op() -> index {
Expand Down
Loading

0 comments on commit 0aa831e

Please sign in to comment.