Skip to content

Commit d074286

Browse files
authored
[Chore] fix typos (#719)
* chore: fix typos * chore: fix ruff * chore: fix clang-format
1 parent 6545b08 commit d074286

File tree

21 files changed

+50
-62
lines changed

21 files changed

+50
-62
lines changed

benchmark/matmul/benchmark_matmul.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,7 @@ def get_configs(args, kwargs):
5353
from tilelang.carver.roller.rasterization import NoRasterization
5454
import torch
5555

56-
if torch.version.hip is not None:
57-
arch=CDNA("hip")
58-
else:
59-
arch = CUDA("cuda")
56+
arch = CDNA("cuda") if torch.version.hip is None else CUDA("hip")
6057
topk = 10
6158

6259
carve_template = MatmulTemplate(

benchmark/matmul/benchmark_matmul_intrinsic.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,7 @@ def get_configs(args, kwargs):
187187
from tilelang.carver.roller.rasterization import NoRasterization
188188
import torch
189189

190-
if torch.version.hip is not None:
191-
arch=CDNA("hip")
192-
else:
193-
arch = CUDA("cuda")
190+
arch = CDNA("cuda") if torch.version.hip is None else CUDA("hip")
194191
topk = 10
195192

196193
carve_template = MatmulTemplate(

docs/deeplearning_operators/gemv.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def splitk_gemv_vectorized(
252252
return main
253253
```
254254

255-
With vectorized read, now the kernel finishs in **~0.0084 ms**, which is getting close to cuBLAS performance.
255+
With vectorized read, now the kernel finishes in **~0.0084 ms**, which is getting close to cuBLAS performance.
256256

257257

258258
## `tvm_thread_allreduce` Instead of `atomicAdd`

examples/analyze/example_conv_analyze.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from tilelang.carver.arch import CDNA
55
from tilelang.layout import make_swizzled_layout
66
import torch
7+
78
N = 64
89
C = 256
910
H = 512
@@ -95,10 +96,7 @@ def conv(
9596

9697
def main():
9798
my_func = kernel(N, C, H, W, F, K, S, D, P, 64, 128, 32, 3, 256)
98-
if torch.version.hip is not None:
99-
cuda_device=CDNA("hip")
100-
else:
101-
cuda_device = CUDA("cuda")
99+
cuda_device = CDNA("cuda") if torch.version.hip is None else CUDA("hip")
102100
result = Analyzer.analysis(my_func, cuda_device)
103101
print(result)
104102
print(f"Analyzed FLOPs: {result.total_flops}")

examples/analyze/example_gemm_analyze.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@ def matmul(
4949
def main():
5050
my_func = kernel(128, 128, 32, 3, 128, True)
5151

52-
if torch.version.hip is not None:
53-
cuda_device=CDNA("hip")
54-
else:
55-
cuda_device = CUDA("cuda")
52+
cuda_device = CDNA("cuda") if torch.version.hip is None else CUDA("hip")
5653
result = Analyzer.analysis(my_func, cuda_device)
5754

5855
print(f"Analyzed FLOPs: {result.total_flops}")

examples/bitnet-1.58b/modeling_bitnet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1373,7 +1373,7 @@ def prepare_inputs_for_generation(self,
13731373
cache_length + input_ids.shape[1] > max_cache_length):
13741374
attention_mask = attention_mask[:, -max_cache_length:]
13751375

1376-
position_ids = kwargs.get("position_ids", None)
1376+
position_ids = kwargs.get("position_ids")
13771377
if attention_mask is not None and position_ids is None:
13781378
# create position_ids on the fly for batch generation
13791379
position_ids = attention_mask.long().cumsum(-1) - 1

examples/gemm/example_gemm_autotune.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@ def ref_program(A, B):
1616

1717
def get_configs(M, N, K, with_roller=False, topk=20):
1818
if with_roller:
19-
if torch.version.hip is not None:
20-
arch=CDNA("hip")
21-
else:
22-
arch = CUDA("cuda")
19+
arch = CDNA("cuda") if torch.version.hip is None else CUDA("hip")
2320
carve_template = MatmulTemplate(
2421
M=M,
2522
N=N,

src/op/gemm_sp.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ Stmt GemmSP::Lower(const LowerArgs &T, arith::Analyzer *analyzer) const {
230230
<< " and " << B.scope();
231231
ICHECK((E.scope() == "shared" || E.scope() == "shared.dyn"))
232232
<< "Only support shared.dyn scope for E as copy from smem to rmem are "
233-
"delegated to cute implemntation, found "
233+
"delegated to cute implementation, found "
234234
<< E.scope();
235235
ss << op_name << "<" << M << ", " << N << ", " << K << ", ";
236236
ss << warp_m << ", " << warp_n << ", ";

src/target/codegen_cpp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class CodeGenTileLangCPP : public CodeGenC {
9595
Array<String> function_names_;
9696
/*! \brief whether to emit asserts in the resulting C code */
9797
bool emit_asserts_;
98-
/*! \brief whether to emit forwared function declarations in the resulting C
98+
/*! \brief whether to emit forward function declarations in the resulting C
9999
* code */
100100
bool emit_fwd_func_decl_;
101101

src/target/codegen_webgpu.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,9 @@ CodeGenTileLangWebGPU::AddFunction(const PrimFunc &f, bool skip_readonly_decl) {
252252
os_param_access << "]";
253253
func_info.launch_param_tags.push_back(os_param_access.str());
254254

255-
ICHECK(!info.has_block_index_z)
256-
<< "blockIdx.z is not supported in WebGPU to accomodate large blockIdx.x";
257-
// anotate workgroup
255+
ICHECK(!info.has_block_index_z) << "blockIdx.z is not supported in WebGPU to "
256+
"accommodate large blockIdx.x";
257+
// annotate workgroup
258258
this->stream << "@compute @workgroup_size(" << info.workgroup_size[0] << ", "
259259
<< info.workgroup_size[1] << ", " << info.workgroup_size[2]
260260
<< ")\n";

0 commit comments

Comments
 (0)