Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ readme = { file = "README.md", content-type = "text/markdown" }
license = "LicenseRef-CANN-Open-Software-License-Agreement-Version-2.0"
license-files = ["LICENSE"]
requires-python = ">=3.9"
dependencies = []
dependencies = ["ptoas"]
keywords = ["python", "pto", "tile", "tile-centric"]
classifiers = [
"Development Status :: 4 - Beta",
Expand Down
40 changes: 39 additions & 1 deletion python/pypto/ir/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"""High-level API functions for PyPTO IR compilation."""

import os
import shutil
import subprocess
from datetime import datetime
from typing import Optional

Expand All @@ -21,6 +23,38 @@
from .pass_manager import OptimizationStrategy, PassManager


def _run_ptoas(
pto_path: str,
output_path: str,
ptoas_flags: Optional[list[str]] = None,
) -> None:
"""Run the ptoas tool to compile a .pto file to C++.
Requires the ``ptoas`` package (``pip install ptoas``).

Args:
pto_path: Path to the input .pto file
output_path: Path for the output .cpp file
ptoas_flags: Additional flags to pass to ptoas (optional)

Raises:
FileNotFoundError: If the ptoas binary is not found in PATH
RuntimeError: If ptoas compilation fails
"""
resolved_bin = shutil.which("ptoas")
if not resolved_bin:
raise FileNotFoundError(
"ptoas binary not found in PATH. Please install the ptoas package: pip install ptoas"
)

cmd = [resolved_bin, pto_path, "-o", output_path]
if ptoas_flags:
cmd.extend(ptoas_flags)

result = subprocess.run(cmd, capture_output=True, text=True, check=False)
if result.returncode != 0:
raise RuntimeError(f"ptoas compilation failed: {result.stderr.strip()}")


def compile(
program: _ir_core.Program,
output_dir: Optional[str] = None,
Expand All @@ -34,7 +68,8 @@ def compile(
1. Runs optimization passes via PassManager
2. Optionally dumps IR before and after each pass (if dump_passes=True)
3. Generates code via selected backend (PTO or CCE)
4. Saves all artifacts to a unified output directory
4. For PTO backend, optionally invokes ptoas to compile .pto to .cpp
5. Saves all artifacts to a unified output directory

Args:
program: Input Program to compile
Expand Down Expand Up @@ -76,6 +111,9 @@ def compile(
pto_path = os.path.join(output_dir, "output.pto")
with open(pto_path, "w") as f:
f.write(pto_code)
# Run ptoas with --enable-insert-sync
cpp_path = os.path.join(output_dir, "output.cpp")
_run_ptoas(pto_path, cpp_path, ptoas_flags=["--enable-insert-sync"])
elif backend_type == BackendType.CCE:
codegen_instance = _codegen_core.CCECodegen()
files = codegen_instance.generate(transformed_program) # type: ignore[arg-type]
Expand Down
34 changes: 17 additions & 17 deletions reference/pto-isa/addc-pto-ir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@ module {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
%0 = pto.make_tensor_view %arg0, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%1 = pto.make_tensor_view %arg1, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%2 = pto.make_tensor_view %arg2, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%3 = pto.make_tensor_view %arg3, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%4 = pto.subview %0, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%5 = pto.subview %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%6 = pto.subview %2, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%7 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%8 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%9 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%10 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tload ins(%4 : !pto.tile_view<32x32xf32>) outs(%7 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tload ins(%5 : !pto.tile_view<32x32xf32>) outs(%8 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tload ins(%6 : !pto.tile_view<32x32xf32>) outs(%9 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.taddc ins(%7, %8, %9 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%10 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
%11 = pto.subview %3, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
pto.tstore ins(%10 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%11 : !pto.tile_view<32x32xf32>)
%0 = pto.make_tensor_view %arg0, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<32x32xf32>
%1 = pto.make_tensor_view %arg1, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<32x32xf32>
%2 = pto.make_tensor_view %arg2, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<32x32xf32>
%3 = pto.make_tensor_view %arg3, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<32x32xf32>
%4 = pto.partition_view %0, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<32x32xf32> -> !pto.partition_tensor_view<32x32xf32>
%5 = pto.partition_view %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<32x32xf32> -> !pto.partition_tensor_view<32x32xf32>
%6 = pto.partition_view %2, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<32x32xf32> -> !pto.partition_tensor_view<32x32xf32>
%7 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%8 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%9 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%10 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tload ins(%4 : !pto.partition_tensor_view<32x32xf32>) outs(%7 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tload ins(%5 : !pto.partition_tensor_view<32x32xf32>) outs(%8 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tload ins(%6 : !pto.partition_tensor_view<32x32xf32>) outs(%9 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.taddc ins(%7, %8, %9 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%10 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
%11 = pto.partition_view %3, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<32x32xf32> -> !pto.partition_tensor_view<32x32xf32>
pto.tstore ins(%10 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%11 : !pto.partition_tensor_view<32x32xf32>)
return
}
}
Expand Down
20 changes: 10 additions & 10 deletions reference/pto-isa/adds-pto-ir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ module {
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
%cst = arith.constant 3.140000e+00 : f32
%0 = pto.make_tensor_view %arg0, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%1 = pto.make_tensor_view %arg1, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%2 = pto.subview %0, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%3 = pto.subview %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%4 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%5 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tload ins(%2 : !pto.tile_view<32x32xf32>) outs(%4 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tadds ins(%4, %cst : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, f32) outs(%5 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
%6 = pto.subview %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
pto.tstore ins(%5 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%6 : !pto.tile_view<32x32xf32>)
%0 = pto.make_tensor_view %arg0, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<?x?xf32>
%1 = pto.make_tensor_view %arg1, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<?x?xf32>
%2 = pto.partition_view %0, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<?x?xf32> -> !pto.partition_tensor_view<32x32xf32>
%3 = pto.partition_view %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<?x?xf32> -> !pto.partition_tensor_view<32x32xf32>
%4 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%5 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tload ins(%2 : !pto.partition_tensor_view<32x32xf32>) outs(%4 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tadds ins(%4, %cst : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, f32) outs(%5 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
%6 = pto.partition_view %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<?x?xf32> -> !pto.partition_tensor_view<32x32xf32>
pto.tstore ins(%5 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%6 : !pto.partition_tensor_view<32x32xf32>)
return
}
}
Expand Down
26 changes: 13 additions & 13 deletions reference/pto-isa/mul-pto-ir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ module {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c32 = arith.constant 32 : index
%0 = pto.make_tensor_view %arg0, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%1 = pto.make_tensor_view %arg1, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%2 = pto.make_tensor_view %arg2, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<2xf32>
%3 = pto.subview %0, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%4 = pto.subview %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
%5 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%6 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%7 = pto.alloc_tile : <loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tload ins(%3 : !pto.tile_view<32x32xf32>) outs(%5 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tload ins(%4 : !pto.tile_view<32x32xf32>) outs(%6 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tmul ins(%5 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, %6 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%7 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
%8 = pto.subview %2, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<2xf32> -> !pto.tile_view<32x32xf32>
pto.tstore ins(%7 : !pto.tile_buf<loc=ub, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%8 : !pto.tile_view<32x32xf32>)
%0 = pto.make_tensor_view %arg0, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<?x?xf32>
%1 = pto.make_tensor_view %arg1, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<?x?xf32>
%2 = pto.make_tensor_view %arg2, shape = [%c32, %c32] strides = [%c32, %c1] : !pto.tensor_view<?x?xf32>
%3 = pto.partition_view %0, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<?x?xf32> -> !pto.partition_tensor_view<32x32xf32>
%4 = pto.partition_view %1, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<?x?xf32> -> !pto.partition_tensor_view<32x32xf32>
%5 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%6 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
%7 = pto.alloc_tile : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>
pto.tload ins(%3 : !pto.partition_tensor_view<32x32xf32>) outs(%5 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tload ins(%4 : !pto.partition_tensor_view<32x32xf32>) outs(%6 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
pto.tmul ins(%5, %6 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>, !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%7 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>)
%8 = pto.partition_view %2, offsets = [%c0, %c0], sizes = [%c32, %c32] : !pto.tensor_view<?x?xf32> -> !pto.partition_tensor_view<32x32xf32>
pto.tstore ins(%7 : !pto.tile_buf<loc=vec, dtype=f32, rows=32, cols=32, v_row=32, v_col=32, blayout=row_major, slayout=none_box, fractal=512, pad=0>) outs(%8 : !pto.partition_tensor_view<32x32xf32>)
return
}
}
Expand Down