quic · ochougul · Nov 13, 2024
diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
@@ -5,7 +5,6 @@
 #
 # ----------------------------------------------------------------------------
 
-import hashlib
 import inspect
 import json
 import logging
@@ -23,7 +22,7 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.generation.cloud_infer import QAICInferenceSession
 from QEfficient.utils import constants
-from QEfficient.utils.cache import QEFF_HOME, to_hashable
+from QEfficient.utils.cache import QEFF_HOME
 
 logger = logging.getLogger(__name__)
 
@@ -67,9 +66,13 @@ def __init__(self, model: torch.nn.Module) -> None:
     @abstractmethod
     def model_name(self) -> str: ...
 
-    @property
+    @classmethod
+    @abstractmethod
+    def model_hash(cls) -> str: ...
+
+    @classmethod
     @abstractmethod
-    def model_hash(self) -> str: ...
+    def compile_hash(cls) -> str: ...
 
     @abstractmethod
     def export(self, export_dir: Optional[str] = None) -> Path:
@@ -115,6 +118,7 @@ def _export(
         example_inputs: Dict[str, torch.Tensor],
         output_names: List[str],
         dynamic_axes: Dict[str, Dict[int, str]],
+        model_hash: str,
         export_kwargs: Optional[Dict[str, any]] = None,
         onnx_transform_kwargs: Optional[Dict[str, any]] = None,
         export_dir: Optional[str] = None,
@@ -130,9 +134,9 @@ def _export(
             :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
             :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
         """
+        onnx_path = self._get_onnx_path(model_hash, export_dir)
         export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
-        export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
-        onnx_path = export_dir / f"{self.model_name}.onnx"
+        export_dir = export_dir.with_name(export_dir.name + "-" + model_hash)
         if onnx_path.is_file():
             self.onnx_path = onnx_path
             return onnx_path
@@ -193,8 +197,22 @@ def _export(
         self.onnx_path = onnx_path
         return onnx_path
 
+    def _get_onnx_path(self, model_hash: str, export_dir: Optional[str] = None):
+        export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
+        export_dir = export_dir.with_name(export_dir.name + "-" + model_hash)
+        onnx_path = export_dir / f"{self.model_name}.onnx"
+        return onnx_path
+
+    def _get_qpc_path(self, compile_hash: str, onnx_path: Optional[str] = None, compile_dir: Optional[str] = None):
+        onnx_path = Path(onnx_path or onnx_path)
+        compile_dir = Path(compile_dir or onnx_path.parent)
+        qpc_path = compile_dir / "qpc"
+        qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
+        return qpc_path
+
     def _compile(
         self,
+        compile_hash: str,
         onnx_path: Optional[str] = None,
         compile_dir: Optional[str] = None,
         *,
@@ -225,6 +243,14 @@ def _compile(
         if not onnx_path.is_file():
             raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
 
+        qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
+        if qpc_path.is_dir():
+            if (qpc_path / "programqpc.bin").is_file():
+                self.qpc_path = qpc_path
+                return qpc_path
+            # Probably compilation failure last time, delete directory to start over
+            shutil.rmtree(qpc_path)
+
         command = constants.COMPILER + [f"-m={onnx_path}"]
         for key, value in compiler_options.items():
             option = "-" + key.replace("_", "-")
@@ -233,26 +259,6 @@ def _compile(
                     command.append(option)
                 continue
             command.append(f"{option}={value}")
-        compile_hash = hashlib.sha256(to_hashable(command))
-
-        if specializations is not None:
-            compile_hash.update(to_hashable(specializations))
-
-        if custom_io is not None:
-            compile_hash.update(to_hashable(custom_io))
-
-        if mdp_ts_num_devices > 1:
-            compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))
-
-        # Check if already compiled
-        compile_hash = compile_hash.hexdigest()[:16]
-        qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
-        if qpc_path.is_dir():
-            if (qpc_path / "programqpc.bin").is_file():
-                self.qpc_path = qpc_path
-                return qpc_path
-            # Probably compilation failure last time, delete directory to start over
-            shutil.rmtree(qpc_path)
 
         # Write specializations.json file
         if specializations is not None:

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -7,6 +7,7 @@
 
 import hashlib
 import logging
+import os
 import warnings
 from pathlib import Path
 from typing import Any, List, Optional, Union
@@ -70,8 +71,8 @@ def model_name(self) -> str:
             mname = mname[4:]
         return mname
 
-    @property
-    def model_hash(self) -> str:
+    @classmethod
+    def model_hash(self, model_config) -> str:
         # NOTE: model_config.to_diff_dict() has "_name_or_path" attribute which is the model card name or path.
         # Using same card name will result in same hash. But, using a relative path for one run and
         # absolute path for another run will result in different hash.
@@ -80,7 +81,7 @@ def model_hash(self) -> str:
 
         # Compute the hash with: model_config, transforms
         mhash = hashlib.sha256()
-        mhash.update(to_hashable(self.model.config.to_diff_dict()))
+        mhash.update(to_hashable(model_config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
         mhash = mhash.hexdigest()[:16]
         return mhash
@@ -159,16 +160,123 @@ def from_pretrained(cls, pretrained_model_name_or_path, continuous_batching: boo
         self.continuous_batching = continuous_batching
         return self
 
-    @property
-    def model_hash(self) -> str:
+    @classmethod
+    def model_hash(cls, model_config, continuous_batching: bool) -> str:
         # Compute the hash with: model_config, continuous_batching, transforms
         mhash = hashlib.sha256()
-        mhash.update(to_hashable(self.model.config.to_diff_dict()))
-        mhash.update(to_hashable({"continuous_batching": self.continuous_batching}))
-        mhash.update(to_hashable(self._transform_names()))
+        mhash.update(to_hashable(model_config.to_diff_dict()))
+        mhash.update(to_hashable({"continuous_batching": continuous_batching}))
+        mhash.update(to_hashable(cls._transform_names()))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
+    @classmethod
+    def get_onnx_path(cls, model_config, continuous_batching: bool = False, export_dir: Optional[str] = None) -> str:
+        mhash = cls.model_hash(model_config, continuous_batching=continuous_batching)
+        return cls._get_onnx_path(model_hash=mhash, export_dir=export_dir)
+
+    @classmethod
+    def compile_hash(
+        cls,
+        model_config,
+        num_cores: int,
+        continuous_batching: bool = False,
+        export_dir: Optional[str] = None,
+        prefill_seq_len: int = 32,
+        ctx_len: int = 128,
+        batch_size: int = 1,
+        full_batch_size: Optional[int] = None,
+        num_devices: int = 1,
+        mxfp6_matmul: bool = False,
+        mxint8_kv_cache: bool = False,
+        **compiler_options,
+    ):
+        onnx_path = cls.get_onnx_path(model_config, continuous_batching, export_dir=export_dir)
+        # Specializations
+        if cls.continuous_batching:
+            if full_batch_size is None:
+                raise TypeError("missing required argument: 'full_batch_size'")
+
+            specializations = [
+                {"full_batch_size": full_batch_size, "batch_size": 1, "seq_len": prefill_seq_len, "ctx_len": ctx_len},
+                {"full_batch_size": full_batch_size, "batch_size": full_batch_size, "seq_len": 1, "ctx_len": ctx_len},
+            ]
+        else:
+            specializations = [
+                {"batch_size": batch_size, "seq_len": prefill_seq_len, "ctx_len": ctx_len},
+                {"batch_size": batch_size, "seq_len": 1, "ctx_len": ctx_len},
+            ]
+
+        # Custom IO
+        custom_io = {}
+        kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16"
+        for suffix in ["", "_RetainedState"]:
+            for i in range(model_config.num_hidden_layers):
+                for kv in ["key", "value"]:
+                    custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype
+
+        compile_hash = hashlib.sha256(
+            to_hashable(
+                {
+                    "onnx_path": onnx_path,
+                    "num_cores": num_cores,
+                    "prefill_seq_len": prefill_seq_len,
+                    "ctx_len": ctx_len,
+                    "batch_size": batch_size,
+                    "full_batch_size": full_batch_size,
+                    "mxfp6_matmul": mxfp6_matmul,
+                    "mxint8_kv_cache": mxint8_kv_cache,
+                    **compiler_options,
+                }
+            )
+        )
+
+        if specializations is not None:
+            compile_hash.update(to_hashable(specializations))
+
+        if custom_io is not None:
+            compile_hash.update(to_hashable(custom_io))
+
+        if num_devices > 1:
+            compile_hash.update(to_hashable({"mdp_ts_num_devices": num_devices}))
+
+        # Check if already compiled
+        compile_hash = compile_hash.hexdigest()[:16]
+        return compile_hash
+
+    @classmethod
+    def get_qpc_path(
+        cls,
+        model_config,
+        num_cores,
+        continuous_batching: bool = False,
+        prefill_seq_len: int = 32,
+        ctx_len: int = 128,
+        batch_size: Optional[int] = 1,
+        full_batch_size: Optional[int] = None,
+        num_devices: int = 1,
+        mxfp6_matmul: bool = False,
+        mxint8_kv_cache: bool = False,
+        onnx_path: Optional[str] = None,
+        compile_dir: Optional[str] = None,
+        **compiler_options,
+    ):
+        compile_hash = cls.compile_hash(
+            model_config,
+            continuous_batching,
+            num_cores=num_cores,
+            export_dir=os.path.dirname(onnx_path) if onnx_path else None,
+            prefill_seq_len=prefill_seq_len,
+            ctx_len=ctx_len,
+            batch_size=batch_size,
+            full_batch_size=full_batch_size,
+            num_devices=num_devices,
+            mxfp6_matmul=mxfp6_matmul,
+            mxint8_kv_cache=mxint8_kv_cache,
+            **compiler_options,
+        )
+        return cls._get_qpc_path(compile_hash, onnx_path, compile_dir)
+
     def export(self, export_dir: Optional[str] = None) -> str:
         """
         Exports the model to ``ONNX`` format using ``torch.onnx.export``.
@@ -220,11 +328,13 @@ def export(self, export_dir: Optional[str] = None) -> str:
             example_inputs,
             output_names,
             dynamic_axes,
+            model_hash=self.model_hash(model_config=self.model.config, continuous_batching=self.continuous_batching),
             export_dir=export_dir,
         )
 
     def compile(
         self,
+        num_cores: int,
         onnx_path: Optional[str] = None,
         compile_dir: Optional[str] = None,
         *,
@@ -233,7 +343,6 @@ def compile(
         batch_size: int = 1,
         full_batch_size: Optional[int] = None,
         num_devices: int = 1,
-        num_cores: int = 16,  # FIXME: Make this mandatory arg
         mxfp6_matmul: bool = False,
         mxint8_kv_cache: bool = False,
         **compiler_options,
@@ -283,7 +392,23 @@ def compile(
                 for kv in ["key", "value"]:
                     custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype
 
+        compile_hash = self.compile_hash(
+            self.model.config,
+            self.continuous_batching,
+            num_cores=num_cores,
+            export_dir=os.path.dirname(onnx_path) if onnx_path else None,
+            prefill_seq_len=prefill_seq_len,
+            ctx_len=ctx_len,
+            batch_size=batch_size,
+            full_batch_size=full_batch_size,
+            num_devices=num_devices,
+            mxfp6_matmul=mxfp6_matmul,
+            mxint8_kv_cache=mxint8_kv_cache,
+            **compiler_options,
+        )
+
         return self._compile(
+            compile_hash,
             onnx_path,
             compile_dir,
             compile_only=True,