Skip to content

[CI] change spell checker from codespell to typos #18711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -200,5 +200,5 @@ benchmarks/**/*.json
actionlint
shellcheck*/

# Ingore moe/marlin_moe gen code
# Ignore moe/marlin_moe gen code
csrc/moe/marlin_moe_wna16/kernel_*
8 changes: 3 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@ repos:
args: [--output-format, github, --fix]
- id: ruff-format
files: ^(.buildkite|benchmarks|examples)/.*
- repo: https://github.com/codespell-project/codespell
rev: v2.4.1
- repo: https://github.com/crate-ci/typos
rev: v1.32.0
hooks:
- id: codespell
additional_dependencies: ['tomli']
args: ['--toml', 'pyproject.toml']
- id: typos
- repo: https://github.com/PyCQA/isort
rev: 6.0.1
hooks:
Expand Down
6 changes: 3 additions & 3 deletions csrc/cpu/attention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ FORCE_INLINE std::pair<T, T> reduceSoftmaxAlibi(T* data, const int size,
}

template <typename T>
FORCE_INLINE void reducePartitonSoftmax(const T* max_data, T* sum_data,
const int size) {
FORCE_INLINE void reducePartitionSoftmax(const T* max_data, T* sum_data,
const int size) {
T max = max_data[0];
for (int i = 1; i < size; ++i) {
max = max >= max_data[i] ? max : max_data[i];
Expand Down Expand Up @@ -634,7 +634,7 @@ struct paged_attention_v2_impl {

if (partition_num == 1) continue;

reducePartitonSoftmax(
reducePartitionSoftmax(
max_logits + seq_idx * num_heads * max_num_partitions +
head_idx * max_num_partitions,
exp_sums + seq_idx * num_heads * max_num_partitions +
Expand Down
10 changes: 5 additions & 5 deletions csrc/cpu/cpu_types_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ struct FP16Vec16 : public Vec<FP16Vec16> {
explicit FP16Vec16(const void* ptr)
: reg((__m256i)_mm256_loadu_si256((__m256i*)ptr)) {}

// non-temproal load
// non-temporal load
explicit FP16Vec16(bool, void* ptr)
: reg(_mm256_stream_load_si256((__m256i*)ptr)) {}

Expand Down Expand Up @@ -120,7 +120,7 @@ struct BF16Vec16 : public Vec<BF16Vec16> {
explicit BF16Vec16(const void* ptr)
: reg((__m256i)_mm256_loadu_si256((__m256i*)ptr)) {}

// non-temproal load
// non-temporal load
explicit BF16Vec16(bool, void* ptr)
: reg(_mm256_stream_load_si256((__m256i*)ptr)) {}

Expand Down Expand Up @@ -327,7 +327,7 @@ struct FP32Vec16 : public Vec<FP32Vec16> {
// normal load
explicit FP32Vec16(const float* ptr) : reg(_mm512_loadu_ps(ptr)) {}

// non-temproal load
// non-temporal load
explicit FP32Vec16(bool, void* ptr)
: reg((__m512)_mm512_stream_load_si512(ptr)) {}

Expand Down Expand Up @@ -576,7 +576,7 @@ struct INT8Vec64 : public Vec<INT8Vec64> {
// normal load
explicit INT8Vec64(void* ptr) : reg(_mm512_loadu_epi8(ptr)) {}

// non-temproal load
// non-temporal load
explicit INT8Vec64(bool, void* ptr) : reg(_mm512_stream_load_si512(ptr)) {}

void save(void* ptr) const { _mm512_storeu_epi8(ptr, reg); }
Expand All @@ -587,7 +587,7 @@ struct INT8Vec64 : public Vec<INT8Vec64> {
_mm512_mask_storeu_epi8(ptr, mask, reg);
}

// non-temproal save
// non-temporal save
void nt_save(int8_t* ptr) { _mm512_stream_si512((__m512i*)ptr, reg); }
};
#endif
Expand Down
16 changes: 8 additions & 8 deletions csrc/moe/moe_permute_unpermute_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ void moe_permute(
const torch::Tensor& input, // [n_token, hidden]
const torch::Tensor& topk_weights, //[n_token, topk]
torch::Tensor& topk_ids, // [n_token, topk]
const torch::Tensor& token_expert_indicies, // [n_token, topk]
const torch::Tensor& token_expert_indices, // [n_token, topk]
const std::optional<torch::Tensor>& expert_map, // [n_expert]
int64_t n_expert, int64_t n_local_expert, int64_t topk,
const std::optional<int64_t>& align_block_size,
Expand All @@ -27,15 +27,15 @@ void moe_permute(
"expert_first_token_offset must be int64");
TORCH_CHECK(topk_ids.scalar_type() == at::ScalarType::Int,
"topk_ids must be int32");
TORCH_CHECK(token_expert_indicies.scalar_type() == at::ScalarType::Int,
"token_expert_indicies must be int32");
TORCH_CHECK(token_expert_indices.scalar_type() == at::ScalarType::Int,
"token_expert_indices must be int32");
TORCH_CHECK(src_row_id2dst_row_id_map.scalar_type() == at::ScalarType::Int,
"src_row_id2dst_row_id_map must be int32");
TORCH_CHECK(expert_first_token_offset.size(0) == n_local_expert + 1,
"expert_first_token_offset shape != n_local_expert+1")
TORCH_CHECK(
src_row_id2dst_row_id_map.sizes() == token_expert_indicies.sizes(),
"token_expert_indicies shape must be same as src_row_id2dst_row_id_map");
src_row_id2dst_row_id_map.sizes() == token_expert_indices.sizes(),
"token_expert_indices shape must be same as src_row_id2dst_row_id_map");
auto n_token = input.sizes()[0];
auto n_hidden = input.sizes()[1];
auto align_block_size_value =
Expand Down Expand Up @@ -71,7 +71,7 @@ void moe_permute(
expert_map_ptr, n_expert, stream);
}
// expert sort topk expert id and scan expert id get expert_first_token_offset
sortAndScanExpert(get_ptr<int>(topk_ids), get_ptr<int>(token_expert_indicies),
sortAndScanExpert(get_ptr<int>(topk_ids), get_ptr<int>(token_expert_indices),
get_ptr<int>(permuted_experts_id),
get_ptr<int>(dst_row_id2src_row_id_map),
get_ptr<int64_t>(expert_first_token_offset), n_token,
Expand Down Expand Up @@ -134,7 +134,7 @@ void moe_unpermute(

void moe_permute(const torch::Tensor& input, const torch::Tensor& topk_weights,
torch::Tensor& topk_ids,
const torch::Tensor& token_expert_indicies,
const torch::Tensor& token_expert_indices,
const std::optional<torch::Tensor>& expert_map,
int64_t n_expert, int64_t n_local_expert, int64_t topk,
const std::optional<int64_t>& align_block_size,
Expand All @@ -147,7 +147,7 @@ void moe_permute(const torch::Tensor& input, const torch::Tensor& topk_weights,

void moe_unpermute(const torch::Tensor& input,
const torch::Tensor& topk_weights, torch::Tensor& topk_ids,
const torch::Tensor& token_expert_indicies,
const torch::Tensor& token_expert_indices,
const std::optional<torch::Tensor>& expert_map,
int64_t n_expert, int64_t n_local_expert, int64_t topk,
const std::optional<int64_t>& align_block_size,
Expand Down
6 changes: 3 additions & 3 deletions csrc/moe/topk_softmax_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,15 @@ void topkGatingSoftmaxLauncherHelper(const float* input, const bool* finished, f

#define LAUNCH_SOFTMAX(NUM_EXPERTS, WARPS_PER_TB) \
topkGatingSoftmaxLauncherHelper<NUM_EXPERTS, WARPS_PER_TB>( \
gating_output, nullptr, topk_weights, topk_indicies, \
gating_output, nullptr, topk_weights, topk_indices, \
token_expert_indices, num_tokens, topk, 0, num_experts, \
stream);

template <typename IndType>
void topkGatingSoftmaxKernelLauncher(
const float* gating_output,
float* topk_weights,
IndType* topk_indicies,
IndType* topk_indices,
int* token_expert_indices,
float* softmax_workspace,
const int num_tokens,
Expand Down Expand Up @@ -476,7 +476,7 @@ void topkGatingSoftmaxKernelLauncher(
moeSoftmax<TPB><<<num_tokens, TPB, 0, stream>>>(
gating_output, nullptr, softmax_workspace, num_experts);
moeTopK<TPB><<<num_tokens, TPB, 0, stream>>>(
softmax_workspace, nullptr, topk_weights, topk_indicies, token_expert_indices,
softmax_workspace, nullptr, topk_weights, topk_indices, token_expert_indices,
num_experts, topk, 0, num_experts);
}
}
Expand Down
2 changes: 1 addition & 1 deletion csrc/moe/torch_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {

m.def(
"moe_permute(Tensor input, Tensor topk_weight, Tensor! topk_ids,"
"Tensor token_expert_indicies, Tensor? expert_map, int n_expert,"
"Tensor token_expert_indices, Tensor? expert_map, int n_expert,"
"int n_local_expert,"
"int topk, int? align_block_size,Tensor! permuted_input, Tensor! "
"expert_first_token_offset, Tensor! src_row_id2dst_row_id_map, Tensor! "
Expand Down
6 changes: 3 additions & 3 deletions csrc/quantization/machete/machete_mainloop.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,7 @@ struct MacheteCollectiveMma {
static constexpr int A_CPY_VEC =
decltype(max_common_vector(tCsA, tCrA_load)){};

static constexpr int COVERSION_WIDTH =
static constexpr int CONVERSION_WIDTH =
std::min(A_CPY_VEC, int(size<0>(tCrA_mma)));

auto load_A_to_registers = [&](int read_stage) {
Expand All @@ -1026,8 +1026,8 @@ struct MacheteCollectiveMma {
// PIPELINED MAIN LOOP
//

auto convert_A = [&, a_vec = Int<COVERSION_WIDTH>{}](int k_block,
int read_stage) {
auto convert_A = [&, a_vec = Int<CONVERSION_WIDTH>{}](int k_block,
int read_stage) {
load_extra_info_to_registers(partitioned_extra_info,
copy_partitions_extra_info, k_block,
read_stage);
Expand Down
14 changes: 7 additions & 7 deletions csrc/rocm/skinny_gemms.cu
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
// Goal is to bring the activation matrix A to the LDS
// and use it across the lifetime of the work group
// TODO: When activation matrix is larger than 64 KB
// then this is not goint to work!
// then this is not going to work!
//----------------------------------------------------
__shared__ scalar_t s[max_lds_len];

Expand Down Expand Up @@ -581,7 +581,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
// Goal is to bring the activation matrix A to the LDS
// and use it across the lifetime of the work group
// TODO: When activation matrix is larger than 64 KB
// then this is not goint to work!
// then this is not going to work!
//----------------------------------------------------
__shared__ scalar_t s[max_lds_len];

Expand All @@ -601,7 +601,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
// int _WvPrGrp = mindiv(N, CuCount * YTILE, WvPrGrp);
uint32_t m = (blockIdx.x * _WvPrGrp + threadIdx.y) * YTILE;

// Check whether there will be fragmenation!
// Check whether there will be fragmentation!
// This will happen only for the last wave!
if (m < M && (m + YTILE) >= M) {
uint32_t startColumn = M - YTILE;
Expand Down Expand Up @@ -827,7 +827,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)

m += CuCount * _WvPrGrp * YTILE;

// Check whether there will be fragmenation!
// Check whether there will be fragmentation!
// This will happen only for the last wave!
if (m < M && (m + YTILE) >= M) {
uint32_t startColumn = M - YTILE;
Expand Down Expand Up @@ -882,7 +882,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
// Goal is to bring the activation matrix A to the LDS
// and use it across the lifetime of the work group
// TODO: When activation matrix is larger than 64 KB
// then this is not goint to work!
// then this is not going to work!
//----------------------------------------------------
__shared__ scalar_t s[max_lds_len];

Expand All @@ -904,7 +904,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
//----------------------------------------------------
uint32_t m = (blockIdx.x * _WvPrGrp + threadIdx.y) * YTILE;

// Check whether there will be fragmenation!
// Check whether there will be fragmentation!
// This will happen only for the last wave!
if (m < M && (m + YTILE) >= M) {
uint32_t startColumn = M - YTILE;
Expand Down Expand Up @@ -1176,7 +1176,7 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS)
m += CuCount * _WvPrGrp * YTILE;
kBase = 0;

// Check whether there will be fragmenation!
// Check whether there will be fragmentation!
// This will happen only for the last wave!
if (m < M && (m + YTILE) >= M) {
uint32_t startColumn = M - YTILE;
Expand Down
2 changes: 1 addition & 1 deletion csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ CompressorResult cutlass_sparse_compress_sm90(torch::Tensor const& a) {
uint32_t const m = 1; // Set M to 1 for compression
uint32_t const n = a.size(1);

// Note: For correctess, the compressed format must be invariant in:
// Note: For correctness, the compressed format must be invariant in:
// - M, the flattened number of tokens
// - Whether output dtype is fp16 or bf16
// - CUTLASS epilogues
Expand Down
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,6 @@ exclude = [
'vllm/attention/ops/.*\.py$'
]

[tool.codespell]
ignore-words-list = "dout, te, indicies, subtile, ElementE"
skip = "tests/models/fixtures/*,tests/prompts/*,benchmarks/sonnet.txt,tests/lora/data/*,build/*,vllm/third_party/*"

[tool.isort]
skip_glob = [
".buildkite/*",
Expand Down
4 changes: 2 additions & 2 deletions tests/compile/test_async_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def test_async_tp_pass_correctness(
"VLLM_USE_V1": "1",
}

aysnc_tp_args = [
async_tp_args = [
*common_args,
"--tensor-parallel-size",
str(tp_size),
Expand All @@ -241,7 +241,7 @@ def test_async_tp_pass_correctness(
]

compare_two_settings(model_id,
aysnc_tp_args,
async_tp_args,
tp_args,
async_tp_env,
tp_env,
Expand Down
4 changes: 2 additions & 2 deletions tests/core/block/e2e/test_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,8 +436,8 @@ def test_auto_prefix_caching_with_preemption(baseline_llm_generator,
"enable_prefix_caching": True,
}])
@pytest.mark.parametrize("seed", [1])
def test_auto_prefix_caching_after_evition_start(baseline_llm_generator,
test_llm_generator):
def test_auto_prefix_caching_after_eviction_start(baseline_llm_generator,
test_llm_generator):
"""Verify block manager v2 with auto prefix caching could works normal
even when eviction started.
With APC enabled, all blocks are held by native block at the beginning.
Expand Down
6 changes: 3 additions & 3 deletions tests/core/block/e2e/test_correctness_sliding_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
@pytest.mark.parametrize("batch_size", [5])
@pytest.mark.parametrize("seed", [1])
@pytest.mark.parametrize("backend", ["FLASH_ATTN", "FLASHINFER", "XFORMERS"])
def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
batch_size, seed, backend, monkeypatch):
def test_sliding_window_retrieval(baseline_llm_generator, test_llm_generator,
batch_size, seed, backend, monkeypatch):
"""
The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then
asks for value of one of them (which is outside the sliding window).
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_sliding_window_retrival(baseline_llm_generator, test_llm_generator,
def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed,
backend, monkeypatch):
"""
This is similar to test_sliding_window_retrival, however, it doesn't
This is similar to test_sliding_window_retrieval, however, it doesn't
compare against the v1 block manager since v1 doesn't support
chunked prefill with sliding window.

Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,8 @@ def cannot_append_second_group(seq_group, num_lookahead_slots):
# should be preempted. 1 will also be preempted.
budget = create_token_budget()
output = scheduler._schedule_running(budget, curr_loras)
remainig_running = scheduler.running
assert len(remainig_running) == 0
remaining_running = scheduler.running
assert len(remaining_running) == 0
assert len(output.decode_seq_groups) == 1
assert len(output.prefill_seq_groups) == 0
assert output.decode_seq_groups[0].seq_group.request_id == "0"
Expand Down
4 changes: 2 additions & 2 deletions tests/entrypoints/openai/test_chat_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
assert chatml_jinja_path.exists()

# Define models, templates, and their corresponding expected outputs
MODEL_TEMPLATE_GENERATON_OUTPUT = [
MODEL_TEMPLATE_GENERATION_OUTPUT = [
("facebook/opt-125m", chatml_jinja_path, True, False, """<|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Expand Down Expand Up @@ -90,7 +90,7 @@ def test_no_load_chat_template_literallike():

@pytest.mark.parametrize(
"model,template,add_generation_prompt,continue_final_message,expected_output",
MODEL_TEMPLATE_GENERATON_OUTPUT)
MODEL_TEMPLATE_GENERATION_OUTPUT)
def test_get_gen_prompt(model, template, add_generation_prompt,
continue_final_message, expected_output):
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
Expand Down
Loading