-
Notifications
You must be signed in to change notification settings - Fork 290
[Feat] Introduce an offset option in threadblock swizzle #668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,76 +1,118 @@ | ||
| name: CI | ||
|
|
||
| on: [pull_request] | ||
|
|
||
| env: | ||
| PYTHON_VERSION: '3.12' | ||
| VENV_DIR: tilelang_ci | ||
|
|
||
| jobs: | ||
| format-check: | ||
| runs-on: self-hosted | ||
|
|
||
| permissions: | ||
| contents: write | ||
|
|
||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v2 | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 | ||
|
|
||
| - name: Set up Python | ||
| uses: actions/setup-python@v2 | ||
| with: | ||
| python-version: '3.9' | ||
| python-version: ${{ env.PYTHON_VERSION }} | ||
|
|
||
| - name: Create virtual environment | ||
| run: python -m venv tilelang_ci | ||
|
|
||
| - name: Activate virtual environment and install dependencies | ||
| - name: Ensure venv (local & persistent) | ||
| run: | | ||
| source tilelang_ci/bin/activate | ||
| python -m pip install --upgrade pip | ||
| if [ -f requirements-dev.txt ]; then python -m pip install -r requirements-dev.txt; fi | ||
|
|
||
| - name: Update submodules recursively | ||
| run: git submodule update --init --recursive | ||
| set -e | ||
| REQS_HASH=$(cat requirements-test.txt 2>/dev/null || true) | ||
| MARKER="${{ runner.tool_cache }}/.venv_marker_${{ env.PYTHON_VERSION }}_${REQS_HASH:0:8}" | ||
|
|
||
| if [[ -f "$MARKER" ]] && [[ -f "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" ]]; then | ||
| echo "venv exists and hash matches – reuse it" | ||
| else | ||
| echo "venv stale or missing – recreating" | ||
| rm -rf "${{ runner.tool_cache }}/${{ env.VENV_DIR }}" "$MARKER" | ||
| python -m venv "${{ runner.tool_cache }}/${{ env.VENV_DIR }}" | ||
| # shellcheck source=/dev/null | ||
| source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" | ||
| python -m pip install --upgrade pip --no-user | ||
| [[ -f requirements-test.txt ]] && \ | ||
| PIP_NO_BUILD_ISOLATION=1 pip install -r requirements-test.txt --no-user | ||
| touch "$MARKER" | ||
| fi | ||
|
|
||
| - name: Run format check | ||
| run: | | ||
| source tilelang_ci/bin/activate | ||
| ./format.sh | ||
| source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" | ||
| if ! output=$(./format.sh 2>&1); then | ||
| echo "------------------------------------" | ||
| echo "message:" | ||
| echo "$output" | ||
| printf '%s\n' "$output" | grep "Please review and stage the changes." | ||
| echo "------------------------------------" | ||
| exit 1 | ||
| fi | ||
|
|
||
| - name: Commit and Push Changes | ||
| uses: stefanzweifel/git-auto-commit-action@v5 | ||
| with: | ||
| commit_message: "lint" | ||
|
|
||
| build-test: | ||
| runs-on: self-hosted | ||
| needs: format-check | ||
|
|
||
| permissions: | ||
| contents: read | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v2 | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 | ||
| repository: ${{ github.event.pull_request.head.repo.full_name }} | ||
| ref: ${{ github.event.pull_request.head.ref }} | ||
|
|
||
| - name: Set up Python | ||
| uses: actions/setup-python@v2 | ||
| with: | ||
| python-version: '3.9' | ||
| python-version: ${{ env.PYTHON_VERSION }} | ||
|
|
||
| - name: Create virtual environment | ||
| run: python -m venv tilelang_ci | ||
|
|
||
| - name: Activate virtual environment and install dependencies | ||
| - name: Ensure venv (local & persistent) | ||
| run: | | ||
| source tilelang_ci/bin/activate | ||
| python -m pip install --upgrade pip | ||
| if [ -f requirements-test.txt ]; then PIP_NO_BUILD_ISOLATION=1 python -m pip install -r requirements-test.txt; fi | ||
|
|
||
| - name: Install project in wheel mode | ||
| set -e | ||
| REQS_HASH=$(cat requirements-test.txt 2>/dev/null || true) | ||
| MARKER="${{ runner.tool_cache }}/.venv_marker_${{ env.PYTHON_VERSION }}_${REQS_HASH:0:8}" | ||
|
|
||
| if [[ -f "$MARKER" ]] && [[ -f "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" ]]; then | ||
| echo "venv exists and hash matches – reuse it" | ||
| else | ||
| echo "venv stale or missing – recreating" | ||
| rm -rf "${{ runner.tool_cache }}/${{ env.VENV_DIR }}" "$MARKER" | ||
| python -m venv "${{ runner.tool_cache }}/${{ env.VENV_DIR }}" | ||
| source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" | ||
| python -m pip install --upgrade pip --no-user | ||
| [[ -f requirements-test.txt ]] && \ | ||
| PIP_NO_BUILD_ISOLATION=1 pip install -r requirements-test.txt --no-user | ||
| pip install . --no-user | ||
| touch "$MARKER" | ||
| fi | ||
|
|
||
| - name: Install project (wheel form) | ||
| run: | | ||
| source tilelang_ci/bin/activate | ||
| python -m pip install . | ||
| source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" | ||
| pip install . --no-user | ||
|
|
||
| - name: Run examples | ||
| run: | | ||
| source tilelang_ci/bin/activate | ||
| source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" | ||
| cd examples | ||
| python -m pytest **/test*.py | ||
| unset PYTHONPATH | ||
| python -m pytest -n 8 **/test*.py | ||
|
|
||
| - name: Run tests | ||
| run: | | ||
| source tilelang_ci/bin/activate | ||
| source "${{ runner.tool_cache }}/${{ env.VENV_DIR }}/bin/activate" | ||
| cd testing/python | ||
| python -m pytest | ||
| unset PYTHONPATH | ||
| python -m pytest -n 8 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
|
|
||
| namespace tl { | ||
|
|
||
| template <int panel_width> TL_DEVICE dim3 rasterization2DRow() { | ||
| template <int panel_width, int offset> TL_DEVICE dim3 rasterization2DRow() { | ||
| const unsigned int block_idx = blockIdx.x + blockIdx.y * gridDim.x; | ||
| const unsigned int grid_size = gridDim.x * gridDim.y; | ||
| const unsigned int panel_size = panel_width * gridDim.x; | ||
|
|
@@ -18,11 +18,11 @@ template <int panel_width> TL_DEVICE dim3 rasterization2DRow() { | |
| const unsigned int col_idx = (panel_idx & 1) | ||
| ? gridDim.x - 1 - panel_offset / stride | ||
| : panel_offset / stride; | ||
| const unsigned int row_idx = panel_offset % stride + panel_idx * panel_width; | ||
| const unsigned int row_idx = (panel_offset % stride + panel_idx * panel_width + offset) % gridDim.y; | ||
| return {col_idx, row_idx, blockIdx.z}; | ||
| } | ||
|
|
||
| template <int panel_width> TL_DEVICE dim3 rasterization2DColumn() { | ||
| template <int panel_width, int offset> TL_DEVICE dim3 rasterization2DColumn() { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to template <int panel_width, unsigned int offset> TL_DEVICE dim3 rasterization2DColumn() { |
||
| const unsigned int block_idx = blockIdx.x + blockIdx.y * gridDim.x; | ||
| const unsigned int grid_size = gridDim.x * gridDim.y; | ||
| const unsigned int panel_size = panel_width * gridDim.y; | ||
|
|
@@ -36,7 +36,7 @@ template <int panel_width> TL_DEVICE dim3 rasterization2DColumn() { | |
| const unsigned int row_idx = (panel_idx & 1) | ||
| ? gridDim.y - 1 - panel_offset / stride | ||
| : panel_offset / stride; | ||
| const unsigned int col_idx = panel_offset % stride + panel_idx * panel_width; | ||
| const unsigned int col_idx = (panel_offset % stride + panel_idx * panel_width + offset) % gridDim.x; | ||
| return {col_idx, row_idx, blockIdx.z}; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -76,13 +76,13 @@ def symbolic(name: str, dtype: str = "int32"): | |||||||
| return tir.Var(name, dtype) | ||||||||
|
|
||||||||
|
|
||||||||
| def use_swizzle(panel_size: int, order: str = "row", enable: bool = True): | ||||||||
| def use_swizzle(panel_size: int, order: str = "row", offset: int = 0, enable: bool = True): | ||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||||
| # If order is row, use rasterization2DRow, otherwise use rasterization2DColumn | ||||||||
| # The panel size is the number of threads in a warp | ||||||||
| # Use to improve the L2 Cache Locality | ||||||||
| device_func = ("rasterization2DRow" if order == "row" else "rasterization2DColumn") | ||||||||
| return attr(None, "threadblock_swizzle_pattern", | ||||||||
| f"tl::{device_func}<{panel_size}>") if enable else None | ||||||||
| f"tl::{device_func}<{panel_size}, {offset}>") if enable else None | ||||||||
|
|
||||||||
|
|
||||||||
| def annotate_layout(layout_map: Dict): | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
intfor theoffsettemplate parameter allows negative values. A negative offset can lead to incorrect behavior with the modulo operator (%) on line 21. In C++, the result of a modulo operation with a negative operand (e.g.,-4 % 10) is negative (-4), which would be an invalid index and could lead to out-of-bounds memory access.To ensure correctness, the offset should be non-negative. I recommend changing the type to
unsigned int, which enforces this constraint at compile time.