Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ CI/Build ] Added E2E Test For Compressed Tensors #5839

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ peft
requests
ray
sentence-transformers # required for embedding
sparseml==1.8.0 # required for compressed-tensors
compressed-tensors==0.4.0 # required for compressed-tensors

# Benchmarking
aiohttp
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from sparseml.transformers import SparseAutoModelForCausalLM
from transformers import (AutoModelForCausalLM, AutoModelForVision2Seq,
AutoProcessor, AutoTokenizer, BatchEncoding)

Expand Down Expand Up @@ -147,6 +148,7 @@ def __init__(
model_kwargs: Optional[Dict[str, Any]] = None,
is_embedding_model: bool = False,
is_vision_model: bool = False,
is_sparseml_model: bool = False,
) -> None:
assert dtype in _STR_DTYPE_TO_TORCH_DTYPE
torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype]
Expand All @@ -164,6 +166,8 @@ def __init__(
else:
if is_vision_model:
auto_cls = AutoModelForVision2Seq
if is_sparseml_model:
auto_cls = SparseAutoModelForCausalLM
else:
auto_cls = AutoModelForCausalLM

Expand Down
44 changes: 44 additions & 0 deletions tests/models/test_compressed_tensors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Compares vllm vs sparseml for compressed-tensors

Note: vllm and sparseml do not have bitwise correctness,
so in this test, we just confirm that the top selected
tokens of the are in the top 5 selections of each other.
"""

import pytest

from .utils import check_logprobs_close

MODELS = [
"nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test",
]

MAX_TOKENS = 32
NUM_LOGPROBS = 5


@pytest.mark.parametrize("model_name", MODELS)
def test_models(
vllm_runner,
hf_runner,
example_prompts,
model_name,
) -> None:
# Run sparseml.
with hf_runner(model_name=model_name,
robertgshaw2-neuralmagic marked this conversation as resolved.
Show resolved Hide resolved
is_sparseml_model=True) as sparseml_model:

sparseml_outputs = sparseml_model.generate_greedy_logprobs_limit(
example_prompts, MAX_TOKENS, NUM_LOGPROBS)

# Run vllm.
with vllm_runner(model_name=model_name) as vllm_model:
vllm_outputs = vllm_model.generate_greedy_logprobs(
example_prompts, MAX_TOKENS, NUM_LOGPROBS)

check_logprobs_close(
outputs_0_lst=sparseml_outputs,
outputs_1_lst=vllm_outputs,
name_0="sparseml",
name_1="vllm",
)
Loading