Skip to content

Commit 7089b00

Browse files
committed
Merge branch 'main' of https://github.com/tile-ai/tilelang into v2_1106
2 parents 093d237 + 777881e commit 7089b00

File tree

11 files changed

+227
-133
lines changed

11 files changed

+227
-133
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,6 @@ jobs:
129129
echo "UV_CACHE_DIR=${XDG_CACHE_HOME}/uv" | tee -a "${GITHUB_ENV}"
130130
echo "PRE_COMMIT_HOME=${XDG_CACHE_HOME}/pip/.pre-commit" | tee -a "${GITHUB_ENV}"
131131
132-
- name: Set environment (GitHub-hosted runners)
133-
if: ${{ !startsWith(matrix.runner.name, 'self-hosted') }}
134-
run: |
135-
# Enable ccache on GitHub-hosted runners to speed up builds
136-
echo "CMAKE_C_COMPILER_LAUNCHER=ccache" | tee -a "${GITHUB_ENV}"
137-
echo "CMAKE_CXX_COMPILER_LAUNCHER=ccache" | tee -a "${GITHUB_ENV}"
138-
139132
# Do not use ccache on self-hosted runners, as it will download/upload caches which is slow.
140133
# Self-hosted runners usually have more CPU power to compile without ccache.
141134
- name: Setup ccache (GitHub-hosted runners)
@@ -144,8 +137,13 @@ jobs:
144137
uses: hendrikmuhs/ccache-action@v1
145138
with:
146139
create-symlink: true
147-
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ matrix.runner.name }}-${{ matrix.runner.toolkit }}
148140
evict-old-files: "7d"
141+
append-timestamp: false
142+
key: ${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }}
143+
restore-keys: |
144+
${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}-${{ hashFiles('**/*.cc') }}
145+
${{ runner.os }}-${{ runner.arch }}-${{ matrix.runner.toolkit }}
146+
${{ runner.os }}-${{ runner.arch }}
149147
150148
- name: Set environment (CUDA)
151149
if: contains(matrix.runner.toolkit, 'CUDA')

.github/workflows/dist.yml

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ jobs:
4444
runs-on: macos-latest
4545
timeout-minutes: 30
4646
env:
47-
# NO_VERSION_LABEL disables embedding the toolchain / git commit hash in version metadata.
47+
# `NO_VERSION_LABEL=ON` disables embedding the toolchain / git commit hash in version metadata.
4848
# Otherwise, the version of the SDist has a git hash suffix (e.g., 0.1.0+gitabcdef12),
4949
# but the package built from the SDist has no way to get the git hash (it is not a git repo),
5050
# leading to inconsistent versions between SDist and built packages (+gitabcdef12 vs. +gitunknown).
51-
NO_VERSION_LABEL: 'OFF'
51+
NO_VERSION_LABEL: 'ON'
5252

5353
steps:
5454
- name: Checkout repository
@@ -72,18 +72,20 @@ jobs:
7272
uses: hendrikmuhs/ccache-action@v1
7373
with:
7474
create-symlink: true
75-
key: ccache-${{ runner.os }}-${{ runner.arch }}
7675
evict-old-files: "7d"
76+
append-timestamp: false
77+
key: sdist-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/*.cc') }}
78+
restore-keys: |
79+
sdist-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/*.cc') }}
80+
sdist-${{ runner.os }}-${{ runner.arch }}
81+
${{ runner.os }}-${{ runner.arch }}
7782
7883
- name: Test SDist buildable
7984
run: |
8085
TEMP_DIR="$(mktemp -d -t tilelang-sdist-test)"
8186
cp -r dist "${TEMP_DIR}/dist"
82-
uv venv --seed "${TEMP_DIR}/venv"
83-
source "${TEMP_DIR}/venv/bin/activate"
8487
cd "${TEMP_DIR}"
85-
python3 -m pip install --upgrade pip setuptools wheel
86-
python3 -m pip install -v dist/*.tar.gz
88+
uv pip install -v dist/*.tar.gz
8789
python3 -c "import tilelang; print(tilelang.__version__)"
8890
8991
- name: Upload SDist
@@ -125,14 +127,19 @@ jobs:
125127
fetch-depth: 1
126128
submodules: recursive
127129

128-
# NB: CIBW builds wheels in containers on Linux
129-
- name: Setup ccache (macOS only)
130-
if: runner.os == 'macOS'
130+
- name: Setup ccache
131131
uses: hendrikmuhs/ccache-action@v1
132132
with:
133133
create-symlink: true
134-
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.python-version }}-${{ matrix.target.toolkit }}
135134
evict-old-files: "7d"
135+
append-timestamp: false
136+
key: wheel-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}-${{ hashFiles('**/*.cc') }}
137+
restore-keys: |
138+
wheel-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}-${{ hashFiles('**/*.cc') }}
139+
wheel-${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}
140+
wheel-${{ runner.os }}-${{ runner.arch }}
141+
${{ runner.os }}-${{ runner.arch }}-${{ matrix.target.toolkit }}
142+
${{ runner.os }}-${{ runner.arch }}
136143
137144
- name: Set CIBW_BUILD
138145
run: |
@@ -147,6 +154,11 @@ jobs:
147154
echo "CUDA_VERSION=${CUDA_VERSION}" | tee -a "${GITHUB_ENV}"
148155
fi
149156
157+
if [[ "${{ runner.os }}" == "Linux" ]]; then
158+
HOST_CCACHE_DIR="$(ccache --get-config cache_dir)"
159+
echo "CIBW_BEFORE_BUILD_LINUX=yum install -y ccache && ccache -o cache_dir=/host${HOST_CCACHE_DIR}" | tee -a "${GITHUB_ENV}"
160+
fi
161+
150162
- name: Build wheels
151163
uses: pypa/cibuildwheel@v3.2
152164
with:

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,4 @@ cmake-build/
104104
cmake-build-*/
105105

106106
# Git version for sdist
107-
_git_commit.txt
107+
.git_commit.txt

CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,16 @@ endif()
4141

4242
find_program(CCACHE_PROGRAM ccache)
4343
if(CCACHE_PROGRAM)
44-
message(STATUS "Using ccache: ${CCACHE_PROGRAM}")
44+
message(STATUS "Using ccache: ${CCACHE_PROGRAM} with base_dir=${CMAKE_SOURCE_DIR}")
45+
if(APPLE)
46+
# Passing configs like `ccache base_dir=/xxx cc ...` is supported
47+
# (likely) since ccache 4.x, which has been provided by homebrew.
48+
# Our Linux builder image (manylinux2014 & manylinux_2_28) still
49+
# provides ccache 3.x and do not support this form.
50+
# `cibuildwheel` uses fixed folder on Linux (`/project`) as working directory,
51+
# so cache would work without setting `base_dir`.
52+
set(CCACHE_PROGRAM "${CCACHE_PROGRAM};base_dir=${CMAKE_SOURCE_DIR}")
53+
endif()
4554
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "C compiler launcher")
4655
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CXX compiler launcher")
4756
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}" CACHE STRING "CUDA compiler launcher")

testing/python/language/test_tilelang_language_frontend_v2.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import torch
44
import tilelang.testing
55
import tvm
6+
from tvm.script.ir_builder.base import IRBuilderFrame
7+
from tvm.tir.expr import IntImm, Var
68

79

810
def test_argument():
@@ -273,6 +275,43 @@ def foo() -> T.Tensor((128,), T.float32):
273275
assert isinstance(foo, T.PrimFunc)
274276

275277

278+
def test_serial_for_with_step():
279+
280+
@tilelang.jit(out_idx=-1)
281+
@T.prim_func
282+
def test_stepped_serial(A: T.Tensor((10,), T.int32)):
283+
with T.Kernel(1) as _:
284+
for i in range(0, 10, 2):
285+
T.device_assert(0 <= i < 10 and i % 2 == 0, "i out of range")
286+
A[i] = 1.0
287+
for i in range(1, 10, 2):
288+
T.device_assert(1 <= i < 10 and i % 2 == 1, "i out of range")
289+
A[i] = 2.0
290+
291+
ker = test_stepped_serial()
292+
res = ker()
293+
ref = torch.tensor([1, 2, 1, 2, 1, 2, 1, 2, 1, 2], dtype=torch.int32, device='cuda')
294+
assert torch.all(res == ref), f"Expected {ref}, but got {res}"
295+
296+
@tilelang.jit(out_idx=-1)
297+
@T.prim_func
298+
def test_serial_step_neg(A: T.Tensor((10,), T.int32)):
299+
with T.Kernel(1) as _:
300+
for i in range(10, 0, -1):
301+
T.device_assert(0 < i <= 10, "i out of range")
302+
A[10 - i] = i
303+
304+
ker = test_serial_step_neg()
305+
res = ker()
306+
ref = torch.tensor([10, 9, 8, 7, 6, 5, 4, 3, 2, 1], dtype=torch.int32, device='cuda')
307+
assert torch.all(res == ref), f"Expected {ref}, but got {res}"
308+
309+
assert isinstance(T.serial(1, 10, 1), IRBuilderFrame)
310+
assert isinstance(T.serial(1, 10, IntImm('int32', 1)), IRBuilderFrame)
311+
assert not isinstance(T.serial(1, 10, Var('tmp', 'int32')), IRBuilderFrame)
312+
assert not isinstance(T.serial(10, -1, -1), IRBuilderFrame)
313+
314+
276315
def test_swap_logic():
277316

278317
@tilelang.jit

tilelang/language/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
SharedBuffer, # noqa: F401
2424
LocalBuffer, # noqa: F401
2525
)
26-
from .parallel import Parallel # noqa: F401
27-
from .pipeline import Pipelined # noqa: F401
28-
from .persistent import Persistent # noqa: F401
26+
from .loop import serial, Parallel, Persistent, Pipelined # noqa: F401
2927
from .frame import has_let_value, get_let_value # noqa: F401
3028
from .math_intrinsics import * # noqa: F401
3129
from .kernel import (

tilelang/language/loop.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""The language interface for tl programs."""
2+
from __future__ import annotations
3+
4+
from typing import Any
5+
from tvm import tir
6+
from tvm.tir import IntImm
7+
import tvm.script.ir_builder.tir as tb_tir
8+
from .v2.builder import SerialForWithStep
9+
from tilelang import _ffi_api
10+
11+
12+
def Parallel(*extents: tir.PrimExpr, coalesced_width: int | None = None):
13+
"""Tools to construct nested parallel for loop.
14+
This can be used to create element-wise tensor expression.
15+
16+
Parameters
17+
----------
18+
extents : PrimExpr
19+
The extents of the iteration.
20+
21+
coalesced_width : Optional[int]
22+
The coalesced width of the parallel loop.
23+
24+
Returns
25+
-------
26+
res : frame.ForFrame
27+
The ForFrame.
28+
"""
29+
annotations: dict[str, Any] = {}
30+
if coalesced_width is not None:
31+
annotations.update({"coalesced_width": coalesced_width})
32+
return _ffi_api.Parallel(extents, annotations) # type: ignore[attr-defined] # pylint: disable=no-member
33+
34+
35+
def Persistent(
36+
domain: list[tir.PrimExpr],
37+
wave_size: tir.PrimExpr,
38+
index: tir.PrimExpr,
39+
group_size: tir.PrimExpr | None = 8,
40+
):
41+
"""Tools to construct persistent for loop.
42+
43+
Parameters
44+
----------
45+
domain : List[tir.PrimExpr]
46+
The list of dominators.
47+
wave_size : int
48+
The wave size.
49+
index : int
50+
The tile index in one wave.
51+
group_size : tir.PrimExpr
52+
The group size.
53+
"""
54+
return _ffi_api.Persistent(domain, wave_size, index, group_size)
55+
56+
57+
def Pipelined(
58+
start: tir.PrimExpr,
59+
stop: tir.PrimExpr = None,
60+
num_stages: int = 0,
61+
order: list[int] | None = None,
62+
stage: list[int] | None = None,
63+
sync: list[list[int]] | None = None,
64+
group: list[list[int]] | None = None,
65+
):
66+
"""Tools to construct pipelined for loop.
67+
68+
Parameters
69+
----------
70+
start : PrimExpr
71+
The minimum value of iteration.
72+
stop : PrimExpr
73+
The maximum value of iteration.
74+
num_stages : int
75+
The max number of buffer used between pipeline producers and consumers.
76+
if num_stages is 0, pipeline will not be enabled.
77+
Returns
78+
-------
79+
res : frame.ForFrame
80+
The ForFrame.
81+
"""
82+
if stop is None:
83+
stop = start
84+
start = IntImm(start.dtype, 0) if hasattr(start, "dtype") else 0
85+
if order is None:
86+
order = []
87+
if stage is None:
88+
stage = []
89+
if sync is None:
90+
sync = []
91+
if group is None:
92+
group = []
93+
# type: ignore[attr-defined] # pylint: disable=no-member
94+
return _ffi_api.Pipelined(start, stop, num_stages, order, stage, sync, group)
95+
96+
97+
def serial(start: tir.PrimExpr,
98+
stop: tir.PrimExpr | None = None,
99+
step: tir.PrimExpr | None = None,
100+
*,
101+
annotations: dict[str, Any] | None = None):
102+
step_is_one = False
103+
step_is_one |= isinstance(step, int) and step == 1
104+
step_is_one |= isinstance(step, IntImm) and step.value == 1
105+
if step is None or step_is_one:
106+
return tb_tir.serial(start, stop, annotations=annotations)
107+
else:
108+
if stop is None:
109+
stop = start
110+
start = IntImm(start.dtype, 0) if hasattr(start, "dtype") else 0
111+
return SerialForWithStep(start, stop, step, annotations=annotations)

tilelang/language/parallel.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

tilelang/language/persistent.py

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)