Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 75 additions & 33 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,76 +1,118 @@
name: CI

on: [pull_request]

env:
PYTHON_VERSION: '3.12'
VENV_DIR: tilelang_ci

jobs:
format-check:
runs-on: self-hosted

permissions:
contents: write

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
python-version: ${{ env.PYTHON_VERSION }}

- name: Create virtual environment
run: python -m venv tilelang_ci

- name: Activate virtual environment and install dependencies
- name: Ensure venv (local & persistent)
run: |
source tilelang_ci/bin/activate
python -m pip install --upgrade pip
if [ -f requirements-dev.txt ]; then python -m pip install -r requirements-dev.txt; fi

- name: Update submodules recursively
run: git submodule update --init --recursive
set -e
REQS_HASH=$(cat requirements-test.txt 2>/dev/null || true)
MARKER="${{ runner.tool_cache }}/.venv_marker_${{ env.PYTHON_VERSION }}_${REQS_HASH:0:8}"

if [[ -f "$MARKER" ]] && [[ -f "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" ]]; then
echo "venv exists and hash matches – reuse it"
else
echo "venv stale or missing – recreating"
rm -rf "${{ runner.tool_cache }}/${{ env.VENV_DIR }}" "$MARKER"
python -m venv "${{ runner.tool_cache }}/${{ env.VENV_DIR }}"
# shellcheck source=/dev/null
source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate"
python -m pip install --upgrade pip --no-user
[[ -f requirements-test.txt ]] && \
PIP_NO_BUILD_ISOLATION=1 pip install -r requirements-test.txt --no-user
touch "$MARKER"
fi

- name: Run format check
run: |
source tilelang_ci/bin/activate
./format.sh
source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate"
if ! output=$(./format.sh 2>&1); then
echo "------------------------------------"
echo "message:"
echo "$output"
printf '%s\n' "$output" | grep "Please review and stage the changes."
echo "------------------------------------"
exit 1
fi

- name: Commit and Push Changes
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: "lint"

build-test:
runs-on: self-hosted
needs: format-check

permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
fetch-depth: 0
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }}

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.9'
python-version: ${{ env.PYTHON_VERSION }}

- name: Create virtual environment
run: python -m venv tilelang_ci

- name: Activate virtual environment and install dependencies
- name: Ensure venv (local & persistent)
run: |
source tilelang_ci/bin/activate
python -m pip install --upgrade pip
if [ -f requirements-test.txt ]; then PIP_NO_BUILD_ISOLATION=1 python -m pip install -r requirements-test.txt; fi

- name: Install project in wheel mode
set -e
REQS_HASH=$(cat requirements-test.txt 2>/dev/null || true)
MARKER="${{ runner.tool_cache }}/.venv_marker_${{ env.PYTHON_VERSION }}_${REQS_HASH:0:8}"

if [[ -f "$MARKER" ]] && [[ -f "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" ]]; then
echo "venv exists and hash matches – reuse it"
else
echo "venv stale or missing – recreating"
rm -rf "${{ runner.tool_cache }}/${{ env.VENV_DIR }}" "$MARKER"
python -m venv "${{ runner.tool_cache }}/${{ env.VENV_DIR }}"
source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate"
python -m pip install --upgrade pip --no-user
[[ -f requirements-test.txt ]] && \
PIP_NO_BUILD_ISOLATION=1 pip install -r requirements-test.txt --no-user
pip install . --no-user
touch "$MARKER"
fi

- name: Install project (wheel form)
run: |
source tilelang_ci/bin/activate
python -m pip install .
source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate"
pip install . --no-user

- name: Run examples
run: |
source tilelang_ci/bin/activate
source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate"
cd examples
python -m pytest **/test*.py
unset PYTHONPATH
python -m pytest -n 8 **/test*.py

- name: Run tests
run: |
source tilelang_ci/bin/activate
source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate"
cd testing/python
python -m pytest
unset PYTHONPATH
python -m pytest -n 8
8 changes: 4 additions & 4 deletions src/tl_templates/cuda/threadblock_swizzle.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace tl {

template <int panel_width> TL_DEVICE dim3 rasterization2DRow() {
template <int panel_width, int offset> TL_DEVICE dim3 rasterization2DRow() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using int for the offset template parameter allows negative values. A negative offset can lead to incorrect behavior with the modulo operator (%) on line 21. In C++, the result of a modulo operation with a negative operand (e.g., -4 % 10) is negative (-4), which would be an invalid index and could lead to out-of-bounds memory access.

To ensure correctness, the offset should be non-negative. I recommend changing the type to unsigned int, which enforces this constraint at compile time.

template <int panel_width, unsigned int offset> TL_DEVICE dim3 rasterization2DRow() {

const unsigned int block_idx = blockIdx.x + blockIdx.y * gridDim.x;
const unsigned int grid_size = gridDim.x * gridDim.y;
const unsigned int panel_size = panel_width * gridDim.x;
Expand All @@ -18,11 +18,11 @@ template <int panel_width> TL_DEVICE dim3 rasterization2DRow() {
const unsigned int col_idx = (panel_idx & 1)
? gridDim.x - 1 - panel_offset / stride
: panel_offset / stride;
const unsigned int row_idx = panel_offset % stride + panel_idx * panel_width;
const unsigned int row_idx = (panel_offset % stride + panel_idx * panel_width + offset) % gridDim.y;
return {col_idx, row_idx, blockIdx.z};
}

template <int panel_width> TL_DEVICE dim3 rasterization2DColumn() {
template <int panel_width, int offset> TL_DEVICE dim3 rasterization2DColumn() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Similar to rasterization2DRow, using int for the offset here can lead to incorrect behavior if a negative value is provided. To ensure the offset is always non-negative and prevent potential indexing errors, please change its type to unsigned int.

template <int panel_width, unsigned int offset> TL_DEVICE dim3 rasterization2DColumn() {

const unsigned int block_idx = blockIdx.x + blockIdx.y * gridDim.x;
const unsigned int grid_size = gridDim.x * gridDim.y;
const unsigned int panel_size = panel_width * gridDim.y;
Expand All @@ -36,7 +36,7 @@ template <int panel_width> TL_DEVICE dim3 rasterization2DColumn() {
const unsigned int row_idx = (panel_idx & 1)
? gridDim.y - 1 - panel_offset / stride
: panel_offset / stride;
const unsigned int col_idx = panel_offset % stride + panel_idx * panel_width;
const unsigned int col_idx = (panel_offset % stride + panel_idx * panel_width + offset) % gridDim.x;
return {col_idx, row_idx, blockIdx.z};
}

Expand Down
4 changes: 2 additions & 2 deletions tilelang/language/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ def symbolic(name: str, dtype: str = "int32"):
return tir.Var(name, dtype)


def use_swizzle(panel_size: int, order: str = "row", enable: bool = True):
def use_swizzle(panel_size: int, order: str = "row", offset: int = 0, enable: bool = True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The offset parameter is passed to a C++ template. As noted in my other comments, this offset must be non-negative to ensure correctness. While my suggestion to change the C++ template parameter to unsigned int will enforce this at compile-time, it's good practice to also add a runtime check in this Python function. This provides a much clearer and more immediate error message to the user.

Suggested change
def use_swizzle(panel_size: int, order: str = "row", offset: int = 0, enable: bool = True):
if offset < 0:
raise ValueError("offset must be a non-negative integer")

# If order is row, use rasterization2DRow, otherwise use rasterization2DColumn
# The panel size is the number of threads in a warp
# Use to improve the L2 Cache Locality
device_func = ("rasterization2DRow" if order == "row" else "rasterization2DColumn")
return attr(None, "threadblock_swizzle_pattern",
f"tl::{device_func}<{panel_size}>") if enable else None
f"tl::{device_func}<{panel_size}, {offset}>") if enable else None


def annotate_layout(layout_map: Dict):
Expand Down
Loading