quic · quic-dhirajku · Jun 24, 2025 · Jun 27, 2025 · Jun 27, 2025 · quic-amitraj
@@ -5,7 +5,7 @@
 #
 # ----------------------------------------------------------------------------
 
-import hashlib
+import copy
 import inspect
 import json
 import logging
@@ -23,8 +23,8 @@
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
-from QEfficient.utils import constants, dump_qconfig
-from QEfficient.utils.cache import QEFF_HOME, to_hashable
+from QEfficient.utils import constants, dump_qconfig, make_serializable
+from QEfficient.utils.cache import QEFF_HOME, hash_dict_params
 
 logger = logging.getLogger(__name__)
 
@@ -46,12 +46,22 @@ class QEFFBaseModel(ABC):
     def _transform_names(cls) -> List[str]:
         return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
 
-    def __init__(self, model: torch.nn.Module) -> None:
+    def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
         self.model = model
+
+        # Store Model parameters to Calculate Hash for caching
+        self.model_params = {}
+        self.model_params = copy.deepcopy(kwargs)
+        self.model_params["config"] = self.model.config.to_diff_dict()
+        self.model_params["_transform_names"] = self._transform_names()
+
+        if hasattr(self.model.config, "architectures"):
+            self.model_architecture = self.model.config.architectures[0]
         self.onnx_path: Optional[str] = None
         self.qpc_path: Optional[str] = None
         self.qpc_session: Optional[QAICInferenceSession] = None
+        self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
 
         # Apply the transformations
         any_transformed = False
@@ -68,10 +78,6 @@ def __init__(self, model: torch.nn.Module) -> None:
     @abstractmethod
     def model_name(self) -> str: ...
 
-    @property
-    @abstractmethod
-    def model_hash(self) -> str: ...
-
     @abstractmethod
     def export(self, export_dir: Optional[str] = None) -> Path:
         """
@@ -135,8 +141,20 @@ def _export(
             :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
             :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
         """
-        export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
-        export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
+        export_params = {}
+        export_params["output_names"] = output_names
+        export_params["dynamic_axes"] = dynamic_axes
+
+        self.model_params["export_params"] = export_params
+
+        self.model_params.update(export_kwargs) if export_kwargs is not None else None
+        self.model_params.update(onnx_transform_kwargs) if export_kwargs is not None else None
+
+        export_dir = Path(export_dir or (QEFF_HOME / self.model_architecture / self.model_name))
+
+        export_hash = hash_dict_params(self.model_params)
+        export_hash = export_hash.hexdigest()[:16]
+        export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
         onnx_path = export_dir / f"{self.model_name}.onnx"
         if onnx_path.is_file():
             self.onnx_path = onnx_path
@@ -203,6 +221,20 @@ def _export(
             onnx.save(model, onnx_path)
             logger.info("Transformed onnx saved")
 
+            # Dumping model paramters in a JSON file after successful ONNX export
+            model_params_json = export_dir / "model_params.json"
+            with open(model_params_json, "w") as fp:
+                json.dump(
+                    {
+                        "model_params": {
+                            k: make_serializable(self.model_params[k]) for k in sorted(self.model_params.keys())
+                        }
+                    },
+                    fp,
+                    indent=4,
+                )
+            logger.info("Parameters used for export hash dumped in a JSON file successfully")
+
         except Exception as e:
             logger.error(f"ONNX export (or) ONNXTransforms failed: {e}")
 
@@ -241,28 +273,23 @@ def _compile(
             :mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
             :num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
             :enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
-            :qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
-            :compiler_options: Pass any compiler option as input.
-                Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
+            :qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
+            :compiler_options: Pass any compiler option as input. Any flag that is supported by `qaic-exec` can be passed. Params are converted to flags as below:
                 - aic_num_cores=16 -> -aic-num-cores=16
                 - convert_to_fp16=True -> -convert-to-fp16
-                For QNN Compilation path, when enable_qnn is set to True, any parameter passed in compiler_options will be ignored.
         """
         if onnx_path is None and self.onnx_path is None:
             self.export()
 
+        self.compile_params = {}
+
         onnx_path = Path(onnx_path or self.onnx_path)
         compile_dir = Path(compile_dir or onnx_path.parent)
         qpc_path = compile_dir / "qpc"
         if not onnx_path.is_file():
             raise FileNotFoundError(f"ONNX file not found at: {onnx_path}")
 
         if enable_qnn:
-            if compiler_options:
-                logger.warning(
-                    f"Extra arguments to QNN compilation are supported only via qnn_config file. Ignoring {compiler_options}"
-                )
-
             self.qpc_path = qnn_compile(
                 onnx_path=onnx_path,
                 qpc_base_path=compile_dir,
@@ -289,22 +316,26 @@ def _compile(
                     command.append(option)
                 continue
             command.append(f"{option}={value}")
-        compile_hash = hashlib.sha256(to_hashable(command))
+
+        self.compile_params["command"] = command
 
         if specializations is not None:
-            compile_hash.update(to_hashable(specializations))
+            self.compile_params.update({"specializations": specializations})
 
         if custom_io is not None:
-            compile_hash.update(to_hashable(custom_io))
+            self.compile_params.update({"custom_io": custom_io})
 
         if num_speculative_tokens:
-            compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
-        # Hash num_devices too, since default value would always be 1.
-        compile_hash.update(to_hashable(mdp_ts_num_devices))
+            self.compile_params.update({"num_speculative_tokens": num_speculative_tokens})
+
+        if mdp_ts_num_devices is not None:
+            self.compile_params.update({"mdp_ts_num_devices": mdp_ts_num_devices})
 
         # Check if already compiled
+        compile_hash = hash_dict_params(self.compile_params)
         compile_hash = compile_hash.hexdigest()[:16]
         compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
+
         qpc_path = compile_dir / "qpc"
         qpc_path.mkdir(parents=True, exist_ok=True)
         if qpc_path.is_dir():
@@ -357,6 +388,19 @@ def _compile(
         logger.info(f"Running compiler: {' '.join(command)}")
         try:
             subprocess.run(command, capture_output=True, check=True)
+
+            # Dumping compile paramters in a JSON file after successful QPC compilation
+            compile_params_json = compile_dir / "compile_params.json"
+            with open(compile_params_json, "w") as fp:
+                json.dump(
+                    {
+                        "compile_params": {
+                            k: make_serializable(self.compile_params[k]) for k in sorted(self.compile_params.keys())
+                        }
+                    },
+                    fp,
+                    indent=4,
+                )
         except subprocess.CalledProcessError as e:
             raise RuntimeError(
                 "\n".join(