1616from QEfficient .base .common import AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP , QEFF_MODEL_TYPE , QEFFCommonLoader
1717from QEfficient .base .modeling_qeff import QEFFBaseModel
1818from QEfficient .exporter .export_utils import export_onnx , fix_onnx_fp16 , generate_input_files , run_model_on_ort
19- from QEfficient .lora .auto import QEffAutoLoraModelForCausalLM
2019from QEfficient .transformers .modeling_utils import get_lists_of_cb_qeff_models
2120from QEfficient .transformers .models .modeling_auto import QEFFAutoModelForCausalLM
2221from QEfficient .utils import load_hf_tokenizer
@@ -149,7 +148,6 @@ def convert_to_cloud_kvstyle(
149148 tokenizer : Union [PreTrainedTokenizer , PreTrainedTokenizerFast ],
150149 onnx_dir_path : str ,
151150 seq_len : int ,
152- max_num_adapters : int ,
153151) -> str :
154152 """
155153 API to convert model with kv retention and export to ONNX.
@@ -178,7 +176,7 @@ def convert_to_cloud_kvstyle(
178176
179177 # Decide path for saving exported ONNX files.
180178 model_name = export_kvstyle_transformed_model_to_onnx (
181- model_name , qeff_model .model , tokenizer , onnx_dir_path , seq_len , max_num_adapters
179+ model_name , qeff_model .model , tokenizer , onnx_dir_path , seq_len
182180 ) # type: ignore
183181
184182 # return the model path for automation.
@@ -192,7 +190,6 @@ def export_kvstyle_transformed_model_to_onnx(
192190 onnx_dir_path : str ,
193191 seq_len : int ,
194192 full_batch_size : Optional [int ] = None ,
195- max_num_adapters : Optional [int ] = None ,
196193) -> str :
197194 # Disabling requires_grad on all parameters
198195 for _ , p in enumerate (transformed_model .parameters ()):
@@ -211,7 +208,6 @@ def export_kvstyle_transformed_model_to_onnx(
211208 prompt_len = Constants .PROMPT_LEN ,
212209 ctx_len = seq_len ,
213210 full_batch_size = full_batch_size ,
214- max_num_adapters = max_num_adapters ,
215211 )
216212
217213 inputs = input_handler .prepare_pytorch_inputs ()
@@ -319,7 +315,6 @@ def export_for_cloud(
319315 onnx_dir_path : str ,
320316 seq_length : int = Constants .SEQ_LEN ,
321317 full_batch_size : Optional [int ] = None ,
322- max_num_adapters : Optional [int ] = None ,
323318) -> str :
324319 # Check if model architecture is supported for continuous batching.
325320 if full_batch_size and qeff_model .model .config .architectures [0 ].lower () not in {
@@ -330,18 +325,14 @@ def export_for_cloud(
330325 )
331326
332327 # FIXME: move all this to class instead of here, and just call qeff_model.export here.
333- if (
334- AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP .get (qeff_model .__class__ , None ) == QEFF_MODEL_TYPE .CAUSALLM
335- or qeff_model .__class__ == QEffAutoLoraModelForCausalLM
336- ): # type: ignore
328+ if AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP .get (qeff_model .__class__ , None ) == QEFF_MODEL_TYPE .CAUSALLM : # type: ignore
337329 return export_lm_model_for_cloud (
338330 model_name = model_name ,
339331 qeff_model = qeff_model , # type: ignore
340332 tokenizer = tokenizer ,
341333 onnx_dir_path = onnx_dir_path ,
342334 seq_length = seq_length ,
343335 full_batch_size = full_batch_size ,
344- max_num_adapters = max_num_adapters ,
345336 )
346337 else :
347338 raise NotImplementedError (
@@ -356,7 +347,6 @@ def export_lm_model_for_cloud(
356347 onnx_dir_path : str ,
357348 seq_length : int ,
358349 full_batch_size : Optional [int ] = None ,
359- max_num_adapters : Optional [int ] = None ,
360350) -> str :
361351 if os .path .exists (onnx_dir_path ):
362352 logger .warning (f"Overriding { onnx_dir_path } " )
@@ -385,7 +375,6 @@ def qualcomm_efficient_converter(
385375 kv : bool = True ,
386376 form_factor : str = "cloud" ,
387377 full_batch_size : Optional [int ] = None ,
388- max_num_adapters : Optional [int ] = None ,
389378) -> Tuple [str , str ]:
390379 """
391380 This method is an alias for ``QEfficient.export``.
@@ -461,7 +450,6 @@ def qualcomm_efficient_converter(
461450 onnx_dir_path = onnx_dir_path ,
462451 seq_length = seq_length ,
463452 full_batch_size = full_batch_size ,
464- max_num_adapters = max_num_adapters ,
465453 )
466454 return onnx_dir_path , generated_onnx_model_path
467455 else :
0 commit comments