Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions src/compressed_tensors/quantization/lifecycle/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,13 @@ def initialize_qparams(

# 2. Identify quantization scale and zp dtype
scale_dtype = observed_dtype
if quantization_args.scale_dtype is None:
if scale_dtype not in [
torch.float16,
torch.bfloat16,
torch.float32,
torch.float64,
]:
scale_dtype = torch.float16
quantization_args.scale_dtype = scale_dtype
if scale_dtype not in [
torch.float16,
torch.bfloat16,
torch.float32,
torch.float64,
]:
scale_dtype = torch.float16

# 3. Initializes scale/zp for the module
init_scale = Parameter(
Expand Down
15 changes: 14 additions & 1 deletion src/compressed_tensors/quantization/quant_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@
from compressed_tensors.utils import Aliasable
from compressed_tensors.utils.helpers import deprecated
from compressed_tensors.utils.type import TorchDtype
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
from pydantic import (
BaseModel,
ConfigDict,
Field,
field_serializer,
field_validator,
model_validator,
)


__all__ = [
Expand Down Expand Up @@ -193,6 +200,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
),
)

@field_serializer("zp_dtype")
def serialize_dtype(self, dtype: torch.dtype):
if self.symmetric:
return None
return str(dtype)

@field_validator("type", mode="before")
def validate_type(cls, value) -> QuantizationType:
if isinstance(value, str):
Expand Down