Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Misc] GPTQ Activation Ordering #8135

Merged
merged 11 commits into from
Sep 9, 2024
Prev Previous commit
Next Next commit
support bool
  • Loading branch information
kylesayrs committed Sep 5, 2024
commit 417bb6645a298e3794eac1a612d555218083a2ad
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import re
from enum import Enum
from typing import Any, Dict, Iterable, Optional
from typing import Any, Dict, Iterable, Optional, Union

from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator
from torch.nn import Module

from vllm.model_executor.layers.quantization.utils.quant_utils import (
Expand Down Expand Up @@ -82,7 +82,7 @@ class QuantizationArgs(BaseModel):
strategy: Optional[QuantizationStrategy] = None
block_structure: Optional[str] = None
dynamic: bool = False
actorder: Optional[ActivationOrdering] = None
actorder: Union[ActivationOrdering, bool, None] = None
observer: str = Field(
default="minmax",
description=("The class to use to compute the quantization param - "
Expand All @@ -95,6 +95,16 @@ class QuantizationArgs(BaseModel):
"Observers constructor excluding quantization range or symmetry"),
)

@field_validator("actorder", mode="before")
def validate_actorder(cls, value) -> Optional[ActivationOrdering]:
if isinstance(value, bool):
return ActivationOrdering.GROUP if value else None

if isinstance(value, str):
return ActivationOrdering(value.lower())

return value


def is_activation_quantization_format(format: str) -> bool:
_ACTIVATION_QUANTIZATION_FORMATS = [
Expand Down
Loading