Skip to content

Commit 2f1ec08

Browse files
committed
final commit changed documentation added better warnings
Signed-off-by: Onkar Chougule <quic_ochougul@quicinc.com>
1 parent 81cea10 commit 2f1ec08

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

QEfficient/transformers/models/mllama/modeling_mllama.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,8 +1127,14 @@ def get_dummy_inputs(self, kv_offload: bool = False):
11271127
vis_cfg = self.config.vision_config
11281128
num_patches = (vis_cfg.image_size // vis_cfg.patch_size) ** 2 + 1
11291129
image_tokens_len = vis_cfg.max_num_tiles * num_patches
1130-
img_size = vis_cfg.get("image_size", 448)
1131-
max_num_img_tiles = vis_cfg.get("max_num_tiles", 4)
1130+
1131+
if vis_cfg := getattr(self.config, "vision_config", None):
1132+
img_size = getattr(vis_cfg, "image_size", 448)
1133+
max_num_img_tiles = getattr(vis_cfg, "max_num_tiles", 4)
1134+
else:
1135+
img_size = 448
1136+
max_num_img_tiles = 4
1137+
11321138
# vision inputs
11331139
vision_inputs = {
11341140
"pixel_values": torch.zeros(

QEfficient/transformers/models/modeling_auto.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
130130
This method serves as the easiest entry point into using QEfficient. The interface is designed to be similar to transformers.AutoModel.
131131
Once the model is initialized, you can use other methods such as export, compile, and generate on the same object.
132132
133+
This API can also be used as exception for VLM model since transformers support loading InternChatVL models via AutoModel API we support it via AutoModelForCausalLM API
133134
Args:
134135
:pretrained_name_or_path (str): Model card name from HuggingFace or local path to model directory.
135136
:args, kwargs: Additional arguments to pass to transformers.AutoModel.
@@ -165,6 +166,14 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
165166
except TypeError:
166167
kwargs.pop("add_pooling_layer", None)
167168
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
169+
170+
# This is support models that should be classified to in a different auto class but transformers load them via this class
171+
kv_offload = kwargs.pop("kv_offload", None)
172+
if model.__class__.__name__ in MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP:
173+
return MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP[model.__class__.__name__](
174+
model, kv_offload=kv_offload
175+
)
176+
168177
return cls(model)
169178

170179
@property
@@ -1123,18 +1132,34 @@ class QEFFAutoModelForImageTextToText:
11231132

11241133
_hf_auto_class = AutoModelForImageTextToText
11251134

1126-
def __new__(self, model: nn.Module, kv_offload=False, **kwargs):
1135+
def __new__(self, model: nn.Module, kv_offload: Optional[bool] = None, **kwargs):
11271136
if model.config.architectures[0] in MODELS_WITH_ACCURACY_ISSUE_FOR_MXFP6 and not kv_offload:
1128-
logger.warning(f"Advised to use kv_offload=True for {model.__class__.__name__}")
1137+
# For models with mxfp6 accuracy issue, we will use kv_offload=True by default
1138+
if kv_offload is None:
1139+
kv_offload = True
1140+
else:
1141+
logger.warning(f"Advised to use kv_offload=True for {model.__class__.__name__}")
1142+
elif kv_offload is None:
1143+
kv_offload = False
11291144

1145+
print(f"{kv_offload}")
11301146
if kv_offload:
11311147
return _QEffAutoModelForImageTextToTextDuaSingleQPC(model, **kwargs)
11321148
else:
11331149
return _QEFFAutoModelForImageTextToText1QPC(model, **kwargs)
11341150

11351151
@classmethod
11361152
@with_replaced_quantizers
1137-
def from_pretrained(cls, pretrained_model_name_or_path, kv_offload=False, **kwargs):
1153+
def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optional[bool] = None, **kwargs):
1154+
"""Used to load models supported by transformers.AutoModelForImageTextToText for Cloud AI 100.
1155+
1156+
Args:
1157+
pretrained_model_name_or_path (str): Path or model card name on HuggingFace
1158+
kv_offload (Optional[bool], optional): Should the KV of vision encoder be offloaded to CPU and use Two QPC. Defaults to None.
1159+
1160+
Returns:
1161+
_type_: _description_
1162+
"""
11381163
# TODO: add a check to see if kv_offload is allowed for given model by loading the config and checking architecture or type of config here.
11391164
if kwargs.get("attn_implementation", None) not in {None, "eager"}:
11401165
logger.warning('Updating attn_implementation="eager"')
@@ -1228,6 +1253,7 @@ def from_pretrained(
12281253
This method serves as the easiest entry point into using QEfficient. The interface is designed to be similar to transformers.AutoModelForCausalLM.
12291254
Once the model is initialized, you can use other methods such as export, compile, and generate on the same object.
12301255
1256+
This API can also be used as exception for VLM model since transformers support loading InternChatVL models via AutoModel API we support it via AutoModelForCausalLM API
12311257
Args:
12321258
:pretrained_name_or_path (str): Model card name from HuggingFace or local path to model directory.
12331259
:continuous_batching (bool): Whether this model will be used for continuous batching in future. If this is not set True here, the model can not be exported/compiled for continuous batching later.
@@ -1263,13 +1289,13 @@ def from_pretrained(
12631289
logger.warning("Updating low_cpu_mem_usage=False")
12641290

12651291
kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
1266-
1267-
kv_offload = kwargs.pop("kv_offload", None)
12681292
model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
12691293

1294+
# This is support models that should be classified to in a different auto class but transformers load them via this class
1295+
kv_offload = kwargs.pop("kv_offload", None)
12701296
if model.__class__.__name__ in MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP:
12711297
return MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP[model.__class__.__name__](
1272-
model, kv_offload=kv_offload if kv_offload else False
1298+
model, kv_offload=kv_offload
12731299
)
12741300

12751301
return cls(model, is_tlm=is_tlm, continuous_batching=continuous_batching)

0 commit comments

Comments
 (0)