Skip to content

Commit 81f3ecb

Browse files
author
Shubham Agrawal
committed
Moved _qnn_compile functionality to qnn_compiler.py
Signed-off-by: Shubham Agrawal <quic_shubhagr@quicinc.com>
1 parent f4a9319 commit 81f3ecb

File tree

3 files changed

+61
-107
lines changed

3 files changed

+61
-107
lines changed

QEfficient/base/modeling_qeff.py

Lines changed: 16 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from QEfficient.compile.qnn_compiler import compile as qnn_compile
2525
from QEfficient.generation.cloud_infer import QAICInferenceSession
2626
from QEfficient.utils import constants, dump_qconfig
27-
from QEfficient.utils._utils import load_json
2827
from QEfficient.utils.cache import QEFF_HOME, to_hashable
2928

3029
logger = logging.getLogger(__name__)
@@ -248,19 +247,6 @@ def _compile(
248247
- convert_to_fp16=True -> -convert-to-fp16
249248
250249
"""
251-
if enable_qnn:
252-
return self._qnn_compile(
253-
onnx_path,
254-
compile_dir,
255-
specializations=specializations,
256-
custom_io=custom_io,
257-
mdp_ts_num_devices=mdp_ts_num_devices,
258-
num_cores=compiler_options.get("aic_num_cores", 16),
259-
mxfp6_matmul=compiler_options.get("mxfp6_matmul", False),
260-
mxint8_kv_cache=mxint8_kv_cache,
261-
qnn_config=qnn_config,
262-
)
263-
264250
if onnx_path is None and self.onnx_path is None:
265251
self.export()
266252

@@ -269,6 +255,22 @@ def _compile(
269255
qpc_path = compile_dir / "qpc"
270256
if not onnx_path.is_file():
271257
raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
258+
259+
if enable_qnn:
260+
self.qpc_path = qnn_compile(
261+
onnx_path=onnx_path,
262+
qpc_base_path=compile_dir,
263+
specializations=specializations,
264+
custom_io=custom_io,
265+
device_group=list(range(mdp_ts_num_devices)),
266+
num_cores=compiler_options.get("aic_num_cores", 16),
267+
mxfp6=compiler_options.get("mxfp6_matmul", False),
268+
mxint8=mxint8_kv_cache,
269+
qnn_config=qnn_config,
270+
)
271+
272+
return self.qpc_path
273+
272274
command = constants.COMPILER + [f"-m={onnx_path}"]
273275
if mdp_ts_json_path := compiler_options.pop("mdp_ts_json_path", None):
274276
mdp_ts_num_devices = None
@@ -363,96 +365,3 @@ def _compile(
363365
self.qpc_path = qpc_path
364366

365367
return qpc_path
366-
367-
@dump_qconfig
368-
def _qnn_compile(
369-
self,
370-
onnx_path: Optional[str] = None,
371-
compile_dir: Optional[str] = None,
372-
*,
373-
custom_io: Optional[Dict[str, str]] = None,
374-
specializations: Optional[List[Dict[str, int]]] = None,
375-
mdp_ts_num_devices: int = 1,
376-
num_cores: int = 16,
377-
mxfp6_matmul: bool = False,
378-
mxint8_kv_cache: bool = False,
379-
qnn_config: Optional[str] = None,
380-
) -> str:
381-
"""
382-
Interface for QNN compiler
383-
384-
Args:
385-
:onnx_path (str): Onnx file to compile
386-
:compile_dir (str): Directory path to compile the qpc. A suffix is added to the directory path to avoid reusing same qpc for different parameters.
387-
:custom_io (dict): Custom IO to specify the input and outputs in different formats than default
388-
:specializations (list): List of specializations to compile for
389-
:mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
390-
:num_cores (int): Number of cores used to compile the model.
391-
:mxfp6_matmul (bool, optional): Whether to use ``mxfp6`` compression for weights. ``Defaults to True``.
392-
:mxint8_kv_cache (bool, optional): Whether to use ``mxint8`` compression for KV cache. ``Defaults to False``.
393-
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
394-
"""
395-
if onnx_path is None and self.onnx_path is None:
396-
self.export()
397-
398-
onnx_path = Path(onnx_path or self.onnx_path)
399-
compile_dir = Path(compile_dir or onnx_path.parent)
400-
qpc_path = compile_dir / "qpc"
401-
if not onnx_path.is_file():
402-
raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
403-
404-
compile_hash = hashlib.sha256(to_hashable("qnn"))
405-
406-
if specializations is not None:
407-
compile_hash.update(to_hashable(specializations))
408-
409-
if custom_io is not None:
410-
compile_hash.update(to_hashable(custom_io))
411-
412-
if qnn_config is not None:
413-
qnn_config_values = load_json(qnn_config)
414-
compile_hash.update(to_hashable(qnn_config_values))
415-
416-
if mdp_ts_num_devices > 1:
417-
compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))
418-
419-
compile_hash.update(to_hashable({"num_cores": num_cores}))
420-
compile_hash.update(to_hashable({"mxfp6_matmul": mxfp6_matmul}))
421-
compile_hash.update(to_hashable({"mxint8_kv_cache": mxint8_kv_cache}))
422-
423-
# Check if already compiled
424-
compile_hash = compile_hash.hexdigest()[:16]
425-
qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
426-
if qpc_path.is_dir():
427-
if (qpc_path / "programqpc.bin").is_file():
428-
self.qpc_path = qpc_path
429-
return qpc_path
430-
# Probably compilation failure last time, delete directory to start over
431-
shutil.rmtree(qpc_path)
432-
433-
# Write specializations.json file
434-
if specializations is not None:
435-
specializations_json = compile_dir / "specializations.json"
436-
with open(specializations_json, "w") as fp:
437-
json.dump(
438-
{"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]},
439-
fp,
440-
indent=4,
441-
)
442-
443-
qnn_compile(
444-
onnx_path=onnx_path,
445-
qpc_base_path=compile_dir,
446-
num_cores=num_cores,
447-
device_group=list(range(mdp_ts_num_devices)),
448-
mxfp6=mxfp6_matmul,
449-
mxint8=mxint8_kv_cache,
450-
qnn_config=qnn_config,
451-
qnn_binary_dir=qpc_path,
452-
specializations=specializations,
453-
custom_io=custom_io,
454-
)
455-
456-
self.qpc_path = qpc_path
457-
458-
return qpc_path

QEfficient/compile/compile_helper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ def compile(
196196
qpc_path = qnn_compile(
197197
onnx_path=onnx_path,
198198
qpc_base_path=qpc_path,
199+
qnn_binary_dir=os.path.join(qpc_path, "qpcs"),
199200
num_cores=num_cores,
200201
mxfp6=mxfp6,
201202
mxint8=mxint8,

QEfficient/compile/qnn_compiler.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
#
66
# -----------------------------------------------------------------------------
77

8+
import hashlib
9+
import json
810
import os
911
import shutil
1012
from typing import Dict, List, Optional
1113

1214
from QEfficient.utils._utils import create_json, execute_command, load_json
15+
from QEfficient.utils.cache import to_hashable
1316
from QEfficient.utils.constants import QnnConstants
1417
from QEfficient.utils.generate_qnn_network_specialization_config import (
1518
generate_data_format_config,
@@ -384,6 +387,47 @@ def compile(
384387

385388
prefill_only = True if len(specializations) == 1 else False
386389

390+
if qnn_binary_dir is None:
391+
compile_hash = hashlib.sha256(to_hashable("qnn"))
392+
393+
if specializations is not None:
394+
compile_hash.update(to_hashable(specializations))
395+
396+
if custom_io is not None:
397+
compile_hash.update(to_hashable(custom_io))
398+
399+
if qnn_config is not None:
400+
qnn_config_values = load_json(qnn_config)
401+
compile_hash.update(to_hashable(qnn_config_values))
402+
403+
if device_group is not None:
404+
compile_hash.update(to_hashable({"device_group": device_group}))
405+
406+
compile_hash.update(to_hashable({"num_cores": num_cores}))
407+
compile_hash.update(to_hashable({"mxfp6": mxfp6}))
408+
compile_hash.update(to_hashable({"mxint8": mxint8}))
409+
410+
# Check if already compiled
411+
compile_hash = compile_hash.hexdigest()[:16]
412+
413+
qnn_binary_dir = qpc_base_path / "qpc"
414+
qnn_binary_dir = qnn_binary_dir.with_name(qnn_binary_dir.name + "-" + compile_hash)
415+
if qnn_binary_dir.is_dir():
416+
if (qnn_binary_dir / "programqpc.bin").is_file():
417+
return qnn_binary_dir
418+
# Probably compilation failure last time, delete directory to start over
419+
shutil.rmtree(qnn_binary_dir)
420+
421+
# Write specializations.json file
422+
if specializations is not None:
423+
specializations_json = qpc_base_path / "specializations.json"
424+
with open(specializations_json, "w") as fp:
425+
json.dump(
426+
{"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]},
427+
fp,
428+
indent=4,
429+
)
430+
387431
qnn_obj = QNN(
388432
onnx_path=onnx_path,
389433
qpc_base_path=qpc_base_path,

0 commit comments

Comments
 (0)