Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions python/pypto/ir/op/block_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,26 @@ def move(
return _ir_core.create_op_call("block.move", args, kwargs, actual_span)


def ub_copy(
tile: Expr,
span: Optional[Span] = None,
) -> Call:
"""Copy tile within UB (Unified Buffer) memory.

This operation is specifically for UB→UB copies. Both source and destination
must be on UB memory. For other memory transfer patterns, use move().

Args:
tile: Input tile (TileType) in UB memory
span: Optional source span for debugging (auto-captured if not provided)

Returns:
Call expression that returns a TileType in UB memory space
"""
actual_span = _get_span_or_capture(span)
return _ir_core.create_op_call("block.ub_copy", [tile], {}, actual_span)


def get_block_idx(span: Optional[Span] = None) -> Call:
"""Get the current block index.

Expand Down
4 changes: 4 additions & 0 deletions python/pypto/language/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
col_expand_div,
col_expand_mul,
col_expand_sub,
create_tile,
expands,
l0c_store,
load,
Expand All @@ -74,6 +75,7 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
sqrt,
store,
sum,
ub_copy,
)
from .op.tensor_ops import assemble, create, dim
from .op.unified_ops import (
Expand Down Expand Up @@ -147,10 +149,12 @@ def scalar_func(x: pl.Scalar[pl.FP32]) -> pl.Scalar[pl.FP32]:
"row_max",
"row_sum",
# Promoted block-only
"create_tile",
"load",
"store",
"l0c_store",
"move",
"ub_copy",
"neg",
"sqrt",
"rsqrt",
Expand Down
4 changes: 3 additions & 1 deletion python/pypto/language/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
col_expand_div,
col_expand_mul,
col_expand_sub,
create_tile,
expands,
l0c_store,
load,
Expand All @@ -53,6 +54,7 @@
sqrt,
store,
sum,
ub_copy,
)

# Promoted tensor-only ops (accessible as pl.create, etc.)
Expand All @@ -62,7 +64,6 @@
from .unified_ops import (
add,
cast,
create_tile,
div,
exp,
matmul,
Expand Down Expand Up @@ -102,6 +103,7 @@
"store",
"l0c_store",
"move",
"ub_copy",
"neg",
"sqrt",
"rsqrt",
Expand Down
18 changes: 18 additions & 0 deletions python/pypto/language/op/block_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"store",
"l0c_store",
"move",
"ub_copy",
"full",
"get_block_idx",
"add",
Expand Down Expand Up @@ -188,6 +189,23 @@ def move(tile: Tile, target_memory: int, transpose: bool = False) -> Tile:
return Tile(expr=call_expr)


def ub_copy(tile: Tile) -> Tile:
"""Copy tile within UB (Unified Buffer) memory.

This is a specialized operation for copying tiles within UB memory (UB→UB).
Both source and destination must be on UB. For other memory transfers,
use move() with the target_memory parameter.

Args:
tile: Input tile (must be in UB memory)

Returns:
Tile wrapping the ub_copy operation (result is in UB memory)
"""
call_expr = _ir_ops.ub_copy(tile.unwrap())
return Tile(expr=call_expr)


def full(shape: list[int], dtype: DataType, value: Union[int, float]) -> Tile:
"""Create a tile from a shape and fill with value in UB.

Expand Down
44 changes: 44 additions & 0 deletions src/backend/910B_CCE/backend_910b_cce_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,18 @@ static std::string MakeBlockL0CStoreCodegenCCE(const ir::CallPtr& op, codegen::C
static std::string MakeBlockMoveCodegenCCE(const ir::CallPtr& op, codegen::CodegenBase& codegen_base) {
auto& codegen = dynamic_cast<codegen::CCECodegen&>(codegen_base);
CHECK(op->args_.size() == 1) << "block.move requires 1 argument: src";

// Validate memory locations: can't UB→UB copies
auto src_type = ir::As<ir::TileType>(op->args_[0]->GetType());
INTERNAL_CHECK(src_type != nullptr) << "Internal error: block.move source must be TileType";
INTERNAL_CHECK(src_type->memref_.has_value())
<< "Internal error: block.move source TileType must have MemRef (InitMemRef pass should have run)";

int target_memory = op->GetKwarg<int>("target_memory");
ir::MemorySpace src_mem = src_type->memref_.value()->memory_space_;
CHECK(!(src_mem == ir::MemorySpace::UB && target_memory == 1))
<< "block.move: UB to UB move should use block.ub_copy";

std::string src = codegen.GetExprAsCode(op->args_[0]);
std::string dst = codegen.GetCurrentResultTarget();

Expand All @@ -256,6 +268,32 @@ static std::string MakeBlockMoveCodegenCCE(const ir::CallPtr& op, codegen::Codeg
return "";
}

// Helper function for block.ub_copy (UB to UB copy only)
static std::string MakeBlockUbCopyCodegenCCE(const ir::CallPtr& op, codegen::CodegenBase& codegen_base) {
auto& codegen = dynamic_cast<codegen::CCECodegen&>(codegen_base);
CHECK(op->args_.size() == 1) << "block.ub_copy requires 1 argument: src";

// Validate memory locations: ONLY support UB→UB copies
auto src_type = ir::As<ir::TileType>(op->args_[0]->GetType());
INTERNAL_CHECK(src_type != nullptr) << "Internal error: block.ub_copy source must be TileType";
INTERNAL_CHECK(src_type->memref_.has_value())
<< "Internal error: block.ub_copy source TileType must have MemRef (InitMemRef pass should have run)";

// Verify source is on UB
ir::MemorySpace src_mem = src_type->memref_.value()->memory_space_;
CHECK(src_mem == ir::MemorySpace::UB)
<< "block.ub_copy: source must be on UB memory, got " << ir::MemorySpaceToString(src_mem);

// Get source and destination expressions
std::string src = codegen.GetExprAsCode(op->args_[0]);
std::string dst = codegen.GetCurrentResultTarget();

// Emit TMOV instruction for UB→UB copy
codegen.Emit("TMOV(" + dst + ", " + src + ");");

return "";
}

// Helper function for block.alloc (no-op: allocation handled elsewhere)
static std::string MakeBlockAllocCodegenCCE(const ir::CallPtr& op, codegen::CodegenBase& codegen_base) {
(void)op;
Expand Down Expand Up @@ -508,6 +546,12 @@ REGISTER_BACKEND_OP(Backend910B_CCE, "block.move")
return MakeBlockMoveCodegenCCE(op, codegen);
});

REGISTER_BACKEND_OP(Backend910B_CCE, "block.ub_copy")
.set_pipe(ir::PipeType::V)
.f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
return MakeBlockUbCopyCodegenCCE(op, codegen);
});

REGISTER_BACKEND_OP(Backend910B_CCE, "block.get_block_idx")
.set_pipe(ir::PipeType::V)
.f_codegen([](const ir::CallPtr& op, codegen::CodegenBase& codegen) {
Expand Down
24 changes: 24 additions & 0 deletions src/ir/op/block_ops/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,21 @@ TypePtr DeduceBlockMoveType(const std::vector<ExprPtr>& args,
return std::make_shared<TileType>(output_shape, tile_type->dtype_);
}

TypePtr DeduceBlockUbCopyType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs,
const std::string& op_name) {
// Validate exactly 1 argument
CHECK(args.size() == 1) << "The operator " << op_name << " requires 1 argument, but got " << args.size();

// Validate argument is TileType
auto tile_type = As<TileType>(args[0]->GetType());
CHECK(tile_type) << "The operator " << op_name << " requires first argument to be a TileType, but got "
<< args[0]->GetType()->TypeName();

// Return TileType with same shape and dtype
return std::make_shared<TileType>(tile_type->shape_, tile_type->dtype_);
}

TypePtr DeduceBlockAllocType(const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs,
const std::string& op_name) {
Expand Down Expand Up @@ -343,6 +358,15 @@ REGISTER_OP("block.move")
return DeduceBlockMoveType(args, kwargs, "block.move");
});

REGISTER_OP("block.ub_copy")
.set_op_category("BlockOp")
.set_description("Copy tile within UB (Unified Buffer) memory - UB to UB only")
.add_argument("tile", "Input tile (TileType) in UB memory")
.f_deduce_type([](const std::vector<ExprPtr>& args,
const std::vector<std::pair<std::string, std::any>>& kwargs) {
return DeduceBlockUbCopyType(args, kwargs, "block.ub_copy");
});

REGISTER_OP("block.alloc")
.set_op_category("BlockOp")
.set_description("Allocate memory for a MemRef object")
Expand Down
Loading