24
24
from QEfficient .compile .qnn_compiler import compile as qnn_compile
25
25
from QEfficient .generation .cloud_infer import QAICInferenceSession
26
26
from QEfficient .utils import constants , dump_qconfig
27
- from QEfficient .utils ._utils import load_json
28
27
from QEfficient .utils .cache import QEFF_HOME , to_hashable
29
28
30
29
logger = logging .getLogger (__name__ )
@@ -248,19 +247,6 @@ def _compile(
248
247
- convert_to_fp16=True -> -convert-to-fp16
249
248
250
249
"""
251
- if enable_qnn :
252
- return self ._qnn_compile (
253
- onnx_path ,
254
- compile_dir ,
255
- specializations = specializations ,
256
- custom_io = custom_io ,
257
- mdp_ts_num_devices = mdp_ts_num_devices ,
258
- num_cores = compiler_options .get ("aic_num_cores" , 16 ),
259
- mxfp6_matmul = compiler_options .get ("mxfp6_matmul" , False ),
260
- mxint8_kv_cache = mxint8_kv_cache ,
261
- qnn_config = qnn_config ,
262
- )
263
-
264
250
if onnx_path is None and self .onnx_path is None :
265
251
self .export ()
266
252
@@ -269,6 +255,22 @@ def _compile(
269
255
qpc_path = compile_dir / "qpc"
270
256
if not onnx_path .is_file ():
271
257
raise FileNotFoundError (f"ONNX file not found at: { onnx_path } " )
258
+
259
+ if enable_qnn :
260
+ self .qpc_path = qnn_compile (
261
+ onnx_path = onnx_path ,
262
+ qpc_base_path = compile_dir ,
263
+ specializations = specializations ,
264
+ custom_io = custom_io ,
265
+ device_group = list (range (mdp_ts_num_devices )),
266
+ num_cores = compiler_options .get ("aic_num_cores" , 16 ),
267
+ mxfp6 = compiler_options .get ("mxfp6_matmul" , False ),
268
+ mxint8 = mxint8_kv_cache ,
269
+ qnn_config = qnn_config ,
270
+ )
271
+
272
+ return self .qpc_path
273
+
272
274
command = constants .COMPILER + [f"-m={ onnx_path } " ]
273
275
if mdp_ts_json_path := compiler_options .pop ("mdp_ts_json_path" , None ):
274
276
mdp_ts_num_devices = None
@@ -363,96 +365,3 @@ def _compile(
363
365
self .qpc_path = qpc_path
364
366
365
367
return qpc_path
366
-
367
- @dump_qconfig
368
- def _qnn_compile (
369
- self ,
370
- onnx_path : Optional [str ] = None ,
371
- compile_dir : Optional [str ] = None ,
372
- * ,
373
- custom_io : Optional [Dict [str , str ]] = None ,
374
- specializations : Optional [List [Dict [str , int ]]] = None ,
375
- mdp_ts_num_devices : int = 1 ,
376
- num_cores : int = 16 ,
377
- mxfp6_matmul : bool = False ,
378
- mxint8_kv_cache : bool = False ,
379
- qnn_config : Optional [str ] = None ,
380
- ) -> str :
381
- """
382
- Interface for QNN compiler
383
-
384
- Args:
385
- :onnx_path (str): Onnx file to compile
386
- :compile_dir (str): Directory path to compile the qpc. A suffix is added to the directory path to avoid reusing same qpc for different parameters.
387
- :custom_io (dict): Custom IO to specify the input and outputs in different formats than default
388
- :specializations (list): List of specializations to compile for
389
- :mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
390
- :num_cores (int): Number of cores used to compile the model.
391
- :mxfp6_matmul (bool, optional): Whether to use ``mxfp6`` compression for weights. ``Defaults to True``.
392
- :mxint8_kv_cache (bool, optional): Whether to use ``mxint8`` compression for KV cache. ``Defaults to False``.
393
- :qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
394
- """
395
- if onnx_path is None and self .onnx_path is None :
396
- self .export ()
397
-
398
- onnx_path = Path (onnx_path or self .onnx_path )
399
- compile_dir = Path (compile_dir or onnx_path .parent )
400
- qpc_path = compile_dir / "qpc"
401
- if not onnx_path .is_file ():
402
- raise FileNotFoundError (f"ONNX file not found at: { onnx_path } " )
403
-
404
- compile_hash = hashlib .sha256 (to_hashable ("qnn" ))
405
-
406
- if specializations is not None :
407
- compile_hash .update (to_hashable (specializations ))
408
-
409
- if custom_io is not None :
410
- compile_hash .update (to_hashable (custom_io ))
411
-
412
- if qnn_config is not None :
413
- qnn_config_values = load_json (qnn_config )
414
- compile_hash .update (to_hashable (qnn_config_values ))
415
-
416
- if mdp_ts_num_devices > 1 :
417
- compile_hash .update (to_hashable ({"mdp_ts_num_devices" : mdp_ts_num_devices }))
418
-
419
- compile_hash .update (to_hashable ({"num_cores" : num_cores }))
420
- compile_hash .update (to_hashable ({"mxfp6_matmul" : mxfp6_matmul }))
421
- compile_hash .update (to_hashable ({"mxint8_kv_cache" : mxint8_kv_cache }))
422
-
423
- # Check if already compiled
424
- compile_hash = compile_hash .hexdigest ()[:16 ]
425
- qpc_path = qpc_path .with_name (qpc_path .name + "-" + compile_hash )
426
- if qpc_path .is_dir ():
427
- if (qpc_path / "programqpc.bin" ).is_file ():
428
- self .qpc_path = qpc_path
429
- return qpc_path
430
- # Probably compilation failure last time, delete directory to start over
431
- shutil .rmtree (qpc_path )
432
-
433
- # Write specializations.json file
434
- if specializations is not None :
435
- specializations_json = compile_dir / "specializations.json"
436
- with open (specializations_json , "w" ) as fp :
437
- json .dump (
438
- {"specializations" : [{k : str (v ) for k , v in spec .items ()} for spec in specializations ]},
439
- fp ,
440
- indent = 4 ,
441
- )
442
-
443
- qnn_compile (
444
- onnx_path = onnx_path ,
445
- qpc_base_path = compile_dir ,
446
- num_cores = num_cores ,
447
- device_group = list (range (mdp_ts_num_devices )),
448
- mxfp6 = mxfp6_matmul ,
449
- mxint8 = mxint8_kv_cache ,
450
- qnn_config = qnn_config ,
451
- qnn_binary_dir = qpc_path ,
452
- specializations = specializations ,
453
- custom_io = custom_io ,
454
- )
455
-
456
- self .qpc_path = qpc_path
457
-
458
- return qpc_path
0 commit comments