Skip to content

Commit

Permalink
fix unit test quantlinear name (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
PZS-ModelCloud authored Jul 1, 2024
1 parent e161bf1 commit e86f44c
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion tests/test_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_load(self):
model = GPTQModel.from_quantized(self.MODEL_ID, use_safetensors=True, device=self.DEVICE)

# validate lm_head is loaded as quantized layer
assert model.model.lm_head.__class__.__name__ == "QuantLinear"
assert model.model.lm_head.__class__.__name__ == "ExllamaV2QuantLinear"

res = model.model.generate(
**inputs, num_beams=1, min_new_tokens=1, max_new_tokens=128, repetition_penalty=1.25
Expand Down
4 changes: 2 additions & 2 deletions tests/test_q4_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import unittest # noqa: E402

import torch # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_cuda_old import QuantLinear as QuantLinearCudaOld # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_cuda_old import CudaOldQuantLinear # noqa: E402
from parameterized import parameterized # noqa: E402

try:
Expand Down Expand Up @@ -561,7 +561,7 @@ def test_cuda_old(self, use_half2: bool):
device = "cuda"

weight_dtype = torch.float16 if use_half2 else torch.float32
linear = QuantLinearCudaOld(
linear = CudaOldQuantLinear(
bits=4,
group_size=group_size,
desc_act=False,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_q4_exallama.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import torch # noqa: E402
from gptqmodel import GPTQModel, exllama_set_max_input_length # noqa: E402
from gptqmodel.models._const import EXLLAMA_DEFAULT_MAX_INPUT_LENGTH # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_exllama import QuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_exllama import ExllamaQuantLinear # noqa: E402
from gptqmodel.quantization import FORMAT
from gptqmodel.utils.importer import select_quant_linear # noqa: E402
from gptqmodel.utils.model import gptqmodel_post_init # noqa: E402
Expand Down Expand Up @@ -1078,7 +1078,7 @@ def test_exllama(self):
outfeatures=n,
bias=False,
)
self.assertTrue(isinstance(linear, QuantLinear))
self.assertTrue(isinstance(linear, ExllamaQuantLinear))

torch.manual_seed(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_q4_exallama_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import torch # noqa: E402
from gptqmodel import Backend, GPTQModel # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_exllamav2 import QuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_exllamav2 import ExllamaV2QuantLinear # noqa: E402
from gptqmodel.quantization import FORMAT
from gptqmodel.utils.importer import select_quant_linear # noqa: E402
from gptqmodel.utils.model import gptqmodel_post_init # noqa: E402
Expand Down Expand Up @@ -46,7 +46,7 @@ def test_exllamav2(self):
bias=False,
)

self.assertTrue(isinstance(linear, QuantLinear))
self.assertTrue(isinstance(linear, ExllamaV2QuantLinear))

torch.manual_seed(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_q4_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import torch # noqa: E402
from gptqmodel import Backend, GPTQModel # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_marlin import QuantLinear as MarlinQuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_marlin import MarlinQuantLinear # noqa: E402
from transformers import AutoTokenizer # noqa: E402


Expand Down
2 changes: 1 addition & 1 deletion tests/test_q4_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import torch # noqa: E402
from gptqmodel import Backend, GPTQModel # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_tritonv2 import QuantLinear as TritonV2QuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_tritonv2 import TritonV2QuantLinear # noqa: E402
from transformers import AutoTokenizer # noqa: E402

GENERATE_EVAL_SIZE = 100
Expand Down
4 changes: 2 additions & 2 deletions tests/test_repacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import torch.nn as nn # noqa: E402
import gptqmodel_marlin_cuda # noqa: E402
# isort: on
from gptqmodel.nn_modules.qlinear.qlinear_cuda_old import QuantLinear as CudaOldQuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_marlin import QuantLinear as MarlinQuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_cuda_old import CudaOldQuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_marlin import MarlinQuantLinear # noqa: E402
from gptqmodel.nn_modules.qlinear.qlinear_marlin import _get_perms, dequantize_weight # noqa: E402


Expand Down

0 comments on commit e86f44c

Please sign in to comment.