Skip to content

separated functions to get onnx/qpc path and exposed via child class #176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 32 additions & 26 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#
# ----------------------------------------------------------------------------

import hashlib
import inspect
import json
import logging
Expand All @@ -23,7 +22,7 @@
from QEfficient.base.pytorch_transforms import PytorchTransform
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants
from QEfficient.utils.cache import QEFF_HOME, to_hashable
from QEfficient.utils.cache import QEFF_HOME

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,9 +66,13 @@ def __init__(self, model: torch.nn.Module) -> None:
@abstractmethod
def model_name(self) -> str: ...

@property
@classmethod
@abstractmethod
def model_hash(cls) -> str: ...

@classmethod
@abstractmethod
def model_hash(self) -> str: ...
def compile_hash(cls) -> str: ...

@abstractmethod
def export(self, export_dir: Optional[str] = None) -> Path:
Expand Down Expand Up @@ -115,6 +118,7 @@ def _export(
example_inputs: Dict[str, torch.Tensor],
output_names: List[str],
dynamic_axes: Dict[str, Dict[int, str]],
model_hash: str,
export_kwargs: Optional[Dict[str, any]] = None,
onnx_transform_kwargs: Optional[Dict[str, any]] = None,
export_dir: Optional[str] = None,
Expand All @@ -130,9 +134,9 @@ def _export(
:onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
:export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
"""
onnx_path = self._get_onnx_path(model_hash, export_dir)
export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
onnx_path = export_dir / f"{self.model_name}.onnx"
export_dir = export_dir.with_name(export_dir.name + "-" + model_hash)
if onnx_path.is_file():
self.onnx_path = onnx_path
return onnx_path
Expand Down Expand Up @@ -193,8 +197,22 @@ def _export(
self.onnx_path = onnx_path
return onnx_path

def _get_onnx_path(self, model_hash: str, export_dir: Optional[str] = None):
export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
export_dir = export_dir.with_name(export_dir.name + "-" + model_hash)
onnx_path = export_dir / f"{self.model_name}.onnx"
return onnx_path

def _get_qpc_path(self, compile_hash: str, onnx_path: Optional[str] = None, compile_dir: Optional[str] = None):
onnx_path = Path(onnx_path or onnx_path)
compile_dir = Path(compile_dir or onnx_path.parent)
qpc_path = compile_dir / "qpc"
qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
return qpc_path

def _compile(
self,
compile_hash: str,
onnx_path: Optional[str] = None,
compile_dir: Optional[str] = None,
*,
Expand Down Expand Up @@ -225,6 +243,14 @@ def _compile(
if not onnx_path.is_file():
raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")

qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
if qpc_path.is_dir():
if (qpc_path / "programqpc.bin").is_file():
self.qpc_path = qpc_path
return qpc_path
# Probably compilation failure last time, delete directory to start over
shutil.rmtree(qpc_path)

command = constants.COMPILER + [f"-m={onnx_path}"]
for key, value in compiler_options.items():
option = "-" + key.replace("_", "-")
Expand All @@ -233,26 +259,6 @@ def _compile(
command.append(option)
continue
command.append(f"{option}={value}")
compile_hash = hashlib.sha256(to_hashable(command))

if specializations is not None:
compile_hash.update(to_hashable(specializations))

if custom_io is not None:
compile_hash.update(to_hashable(custom_io))

if mdp_ts_num_devices > 1:
compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))

# Check if already compiled
compile_hash = compile_hash.hexdigest()[:16]
qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
if qpc_path.is_dir():
if (qpc_path / "programqpc.bin").is_file():
self.qpc_path = qpc_path
return qpc_path
# Probably compilation failure last time, delete directory to start over
shutil.rmtree(qpc_path)

# Write specializations.json file
if specializations is not None:
Expand Down
143 changes: 134 additions & 9 deletions QEfficient/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import hashlib
import logging
import os
import warnings
from pathlib import Path
from typing import Any, List, Optional, Union
Expand Down Expand Up @@ -70,8 +71,8 @@ def model_name(self) -> str:
mname = mname[4:]
return mname

@property
def model_hash(self) -> str:
@classmethod
def model_hash(self, model_config) -> str:
# NOTE: model_config.to_diff_dict() has "_name_or_path" attribute which is the model card name or path.
# Using same card name will result in same hash. But, using a relative path for one run and
# absolute path for another run will result in different hash.
Expand All @@ -80,7 +81,7 @@ def model_hash(self) -> str:

# Compute the hash with: model_config, transforms
mhash = hashlib.sha256()
mhash.update(to_hashable(self.model.config.to_diff_dict()))
mhash.update(to_hashable(model_config.to_diff_dict()))
mhash.update(to_hashable(self._transform_names()))
mhash = mhash.hexdigest()[:16]
return mhash
Expand Down Expand Up @@ -159,16 +160,123 @@ def from_pretrained(cls, pretrained_model_name_or_path, continuous_batching: boo
self.continuous_batching = continuous_batching
return self

@property
def model_hash(self) -> str:
@classmethod
def model_hash(cls, model_config, continuous_batching: bool) -> str:
# Compute the hash with: model_config, continuous_batching, transforms
mhash = hashlib.sha256()
mhash.update(to_hashable(self.model.config.to_diff_dict()))
mhash.update(to_hashable({"continuous_batching": self.continuous_batching}))
mhash.update(to_hashable(self._transform_names()))
mhash.update(to_hashable(model_config.to_diff_dict()))
mhash.update(to_hashable({"continuous_batching": continuous_batching}))
mhash.update(to_hashable(cls._transform_names()))
mhash = mhash.hexdigest()[:16]
return mhash

@classmethod
def get_onnx_path(cls, model_config, continuous_batching: bool = False, export_dir: Optional[str] = None) -> str:
mhash = cls.model_hash(model_config, continuous_batching=continuous_batching)
return cls._get_onnx_path(model_hash=mhash, export_dir=export_dir)

@classmethod
def compile_hash(
cls,
model_config,
num_cores: int,
continuous_batching: bool = False,
export_dir: Optional[str] = None,
prefill_seq_len: int = 32,
ctx_len: int = 128,
batch_size: int = 1,
full_batch_size: Optional[int] = None,
num_devices: int = 1,
mxfp6_matmul: bool = False,
mxint8_kv_cache: bool = False,
**compiler_options,
):
onnx_path = cls.get_onnx_path(model_config, continuous_batching, export_dir=export_dir)
# Specializations
if cls.continuous_batching:
if full_batch_size is None:
raise TypeError("missing required argument: 'full_batch_size'")

specializations = [
{"full_batch_size": full_batch_size, "batch_size": 1, "seq_len": prefill_seq_len, "ctx_len": ctx_len},
{"full_batch_size": full_batch_size, "batch_size": full_batch_size, "seq_len": 1, "ctx_len": ctx_len},
]
else:
specializations = [
{"batch_size": batch_size, "seq_len": prefill_seq_len, "ctx_len": ctx_len},
{"batch_size": batch_size, "seq_len": 1, "ctx_len": ctx_len},
]

# Custom IO
custom_io = {}
kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16"
for suffix in ["", "_RetainedState"]:
for i in range(model_config.num_hidden_layers):
for kv in ["key", "value"]:
custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype

compile_hash = hashlib.sha256(
to_hashable(
{
"onnx_path": onnx_path,
"num_cores": num_cores,
"prefill_seq_len": prefill_seq_len,
"ctx_len": ctx_len,
"batch_size": batch_size,
"full_batch_size": full_batch_size,
"mxfp6_matmul": mxfp6_matmul,
"mxint8_kv_cache": mxint8_kv_cache,
**compiler_options,
}
)
)

if specializations is not None:
compile_hash.update(to_hashable(specializations))

if custom_io is not None:
compile_hash.update(to_hashable(custom_io))

if num_devices > 1:
compile_hash.update(to_hashable({"mdp_ts_num_devices": num_devices}))

# Check if already compiled
compile_hash = compile_hash.hexdigest()[:16]
return compile_hash

@classmethod
def get_qpc_path(
cls,
model_config,
num_cores,
continuous_batching: bool = False,
prefill_seq_len: int = 32,
ctx_len: int = 128,
batch_size: Optional[int] = 1,
full_batch_size: Optional[int] = None,
num_devices: int = 1,
mxfp6_matmul: bool = False,
mxint8_kv_cache: bool = False,
onnx_path: Optional[str] = None,
compile_dir: Optional[str] = None,
**compiler_options,
):
compile_hash = cls.compile_hash(
model_config,
continuous_batching,
num_cores=num_cores,
export_dir=os.path.dirname(onnx_path) if onnx_path else None,
prefill_seq_len=prefill_seq_len,
ctx_len=ctx_len,
batch_size=batch_size,
full_batch_size=full_batch_size,
num_devices=num_devices,
mxfp6_matmul=mxfp6_matmul,
mxint8_kv_cache=mxint8_kv_cache,
**compiler_options,
)
return cls._get_qpc_path(compile_hash, onnx_path, compile_dir)

def export(self, export_dir: Optional[str] = None) -> str:
"""
Exports the model to ``ONNX`` format using ``torch.onnx.export``.
Expand Down Expand Up @@ -220,11 +328,13 @@ def export(self, export_dir: Optional[str] = None) -> str:
example_inputs,
output_names,
dynamic_axes,
model_hash=self.model_hash(model_config=self.model.config, continuous_batching=self.continuous_batching),
export_dir=export_dir,
)

def compile(
self,
num_cores: int,
onnx_path: Optional[str] = None,
compile_dir: Optional[str] = None,
*,
Expand All @@ -233,7 +343,6 @@ def compile(
batch_size: int = 1,
full_batch_size: Optional[int] = None,
num_devices: int = 1,
num_cores: int = 16, # FIXME: Make this mandatory arg
mxfp6_matmul: bool = False,
mxint8_kv_cache: bool = False,
**compiler_options,
Expand Down Expand Up @@ -283,7 +392,23 @@ def compile(
for kv in ["key", "value"]:
custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype

compile_hash = self.compile_hash(
self.model.config,
self.continuous_batching,
num_cores=num_cores,
export_dir=os.path.dirname(onnx_path) if onnx_path else None,
prefill_seq_len=prefill_seq_len,
ctx_len=ctx_len,
batch_size=batch_size,
full_batch_size=full_batch_size,
num_devices=num_devices,
mxfp6_matmul=mxfp6_matmul,
mxint8_kv_cache=mxint8_kv_cache,
**compiler_options,
)

return self._compile(
compile_hash,
onnx_path,
compile_dir,
compile_only=True,
Expand Down