@@ -43,6 +43,7 @@ class AutoRound:
4343 Attributes:
4444 model (torch.nn.Module): The loaded PyTorch model in eval mode.
4545 tokenizer: Tokenizer used to prepare input text for calibration/tuning.
46+ platform (str): The platform to load pretrained moded, options: ["hf", "model_scope"]
4647 bits (int): Weight quantization bits.
4748 group_size (int): Per-group size for weight quantization.
4849 sym (bool): Whether to use symmetric weight quantization.
@@ -67,6 +68,7 @@ def __new__(
6768 cls ,
6869 model : Union [torch .nn .Module , str ],
6970 tokenizer = None ,
71+ platform : str = "hf" ,
7072 scheme : Union [str , dict , QuantizationScheme , AutoScheme ] = "W4A16" ,
7173 layer_config : dict [str , Union [str , dict , QuantizationScheme ]] = None ,
7274 dataset : Union [str , list , tuple , torch .utils .data .DataLoader ] = "NeelNanda/pile-10k" ,
@@ -146,7 +148,7 @@ def __new__(
146148 """
147149 model_cls = []
148150
149- if (extra_config and not extra_config .mllm_config .is_default ()) or is_mllm_model (model ):
151+ if (extra_config and not extra_config .mllm_config .is_default ()) or is_mllm_model (model , platform = platform ):
150152 logger .info ("using MLLM mode for multimodal model." )
151153 model_cls .append (MLLMCompressor )
152154 if extra_config :
@@ -170,6 +172,7 @@ def __new__(
170172 ar = dynamic_compressor (
171173 model = model ,
172174 tokenizer = tokenizer ,
175+ platform = platform ,
173176 scheme = scheme ,
174177 layer_config = layer_config ,
175178 dataset = dataset ,
@@ -314,6 +317,7 @@ def __init__(
314317 self ,
315318 model : Union [torch .nn .Module , str ],
316319 tokenizer = None ,
320+ platform : str = "hf" ,
317321 scheme : Union [str , dict , QuantizationScheme ] = "W4A16" ,
318322 layer_config : dict [str , Union [str , dict , QuantizationScheme ]] = None ,
319323 dataset : Union [str , list , tuple , torch .utils .data .DataLoader ] = "NeelNanda/pile-10k" ,
@@ -331,6 +335,7 @@ def __init__(
331335 super ().__init__ (
332336 model = model ,
333337 tokenizer = tokenizer ,
338+ platform = platform ,
334339 scheme = scheme ,
335340 layer_config = layer_config ,
336341 dataset = dataset ,
@@ -354,6 +359,7 @@ class AutoRoundAdam(AdamCompressor):
354359 Args:
355360 model: The PyTorch model to be quantized.
356361 tokenizer: An optional tokenizer for processing input data.
362+ platform (str): The platform to load pretrained moded, options: ["hf", "model_scope"]
357363 scheme (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations
358364 bits (int): Number of bits for quantization (default is 4).
359365 group_size (int): Size of the quantization group (default is 128).
@@ -413,6 +419,7 @@ def __init__(
413419 self ,
414420 model : Union [torch .nn .Module , str ],
415421 tokenizer = None ,
422+ platform : str = "hf" ,
416423 scheme : Union [str , dict , QuantizationScheme ] = "W4A16" ,
417424 layer_config : dict [str , Union [str , dict , QuantizationScheme ]] = None ,
418425 dataset : Union [str , list , tuple , torch .utils .data .DataLoader ] = "NeelNanda/pile-10k" ,
@@ -431,6 +438,7 @@ def __init__(
431438 super ().__init__ (
432439 model = model ,
433440 tokenizer = tokenizer ,
441+ platform = platform ,
434442 scheme = scheme ,
435443 layer_config = layer_config ,
436444 batch_size = batch_size ,
@@ -455,6 +463,7 @@ class AutoRoundMLLM(MLLMCompressor):
455463 Args:
456464 model: The PyTorch model to be quantized.
457465 tokenizer: An optional tokenizer for processing input data.
466+ platform (str): The platform to load pretrained moded, options: ["hf", "model_scope"]
458467 processor: Any multi-modal model will require an object to encode or
459468 decode the data that groups several modalities (among text, vision and audio).
460469 image_processor: Image processor for special model like llava.
@@ -513,6 +522,7 @@ def __init__(
513522 self ,
514523 model : Union [torch .nn .Module , str ],
515524 tokenizer = None ,
525+ platform : str = "hf" ,
516526 processor = None ,
517527 image_processor = None ,
518528 scheme : Union [str , dict , QuantizationScheme ] = "W4A16" ,
@@ -533,6 +543,7 @@ def __init__(
533543 super ().__init__ (
534544 model = model ,
535545 tokenizer = tokenizer ,
546+ platform = platform ,
536547 processor = processor ,
537548 image_processor = image_processor ,
538549 scheme = scheme ,
@@ -559,6 +570,7 @@ class AutoRoundDiffusion(DiffusionCompressor):
559570 Args:
560571 model: The PyTorch model to be quantized.
561572 tokenizer: An optional tokenizer for processing input data, is not used for diffusion models.
573+ platform (str): The platform to load pretrained moded, options: ["hf", "model_scope"]
562574 guidance_scale (float): Control how much the image generation process follows the text prompt.
563575 The more it is, the more closely it follows the prompt (default is 7.5).
564576 num_inference_steps (int): The reference number of denoising steps (default is 50).
0 commit comments