-
-
Notifications
You must be signed in to change notification settings - Fork 8.4k
[Bugfix][1/n] Fix the speculative decoding test by setting the target dtype #19633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -57,6 +57,9 @@ | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
|
||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
}]) | ||||||||
@pytest.mark.parametrize( | ||||||||
"per_test_common_llm_kwargs", | ||||||||
|
@@ -139,6 +142,9 @@ def test_spec_decode_e2e_with_detokenization(test_llm_generator, | |||||||
|
||||||||
# Print spec metrics. | ||||||||
"disable_log_stats": False, | ||||||||
|
||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+146
to
+147
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the previous comment, this
Suggested change
|
||||||||
}]) | ||||||||
@pytest.mark.parametrize( | ||||||||
"per_test_common_llm_kwargs", | ||||||||
|
@@ -216,6 +222,9 @@ def test_spec_decode_e2e_greedy_correctness_tiny_model_bs1( | |||||||
|
||||||||
# Print spec metrics. | ||||||||
"disable_log_stats": False, | ||||||||
|
||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+226
to
+227
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is another instance of the repeated
Suggested change
|
||||||||
}]) | ||||||||
@pytest.mark.parametrize( | ||||||||
"per_test_common_llm_kwargs", | ||||||||
|
@@ -279,6 +288,9 @@ def test_spec_decode_e2e_greedy_correctness_tiny_model_large_bs( | |||||||
[{ | ||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
|
||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
}]) | ||||||||
@pytest.mark.parametrize( | ||||||||
"per_test_common_llm_kwargs", | ||||||||
|
@@ -464,6 +476,8 @@ def test_spec_decode_e2e_greedy_correctness_real_model_large_bs( | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+479
to
+480
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||||
}]) | ||||||||
@pytest.mark.parametrize("per_test_common_llm_kwargs", [ | ||||||||
{ | ||||||||
|
@@ -523,6 +537,8 @@ def test_spec_decode_e2e_greedy_correctness_with_preemption( | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+540
to
+541
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
}]) | ||||||||
@pytest.mark.parametrize( | ||||||||
"per_test_common_llm_kwargs", | ||||||||
|
@@ -589,6 +605,8 @@ def test_spec_decode_different_block_size(vllm_runner, common_llm_kwargs, | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+608
to
+609
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
}]) | ||||||||
@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) | ||||||||
@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) | ||||||||
|
@@ -655,6 +673,8 @@ def test_skip_speculation(vllm_runner, common_llm_kwargs, | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+676
to
+677
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
}]) | ||||||||
@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) | ||||||||
@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) | ||||||||
|
@@ -706,6 +726,8 @@ def test_disable_speculation(vllm_runner, common_llm_kwargs, | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+729
to
+730
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||
}]) | ||||||||
@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) | ||||||||
@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) | ||||||||
|
@@ -763,6 +785,8 @@ def test_many_k(vllm_runner, common_llm_kwargs, per_test_common_llm_kwargs, | |||||||
|
||||||||
# Skip cuda graph recording for fast test. | ||||||||
"enforce_eager": True, | ||||||||
# The original model is float32, keep it for numerical stability. | ||||||||
"dtype": "float32", | ||||||||
Comment on lines
+788
to
+789
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the final instance of the repeated
Suggested change
|
||||||||
}]) | ||||||||
@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) | ||||||||
@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
dtype
setting and its accompanying comment are repeated in several places throughout this file. To enhance maintainability and adhere to the DRY (Don't Repeat Yourself) principle, consider defining these common keyword arguments in a shared dictionary at the module level. This dictionary can then be spread into each specificcommon_llm_kwargs
dictionary where this setting is needed.For instance, you could define a constant like this at the module scope:
And then utilize it within the
parametrize
decorator as shown in the suggestion below. This approach centralizes the configuration, making future updates simpler and more consistent.