Merge branch 'quic:main' into cpp_inference

quic · Nov 7, 2024 · f66df27 · f66df27
2 parents a41cec7 + 625cb9f
commit f66df27
Show file tree

Hide file tree

Showing 55 changed files with 2,500 additions and 868 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -7,8 +7,8 @@ FROM docker-registry.qualcomm.com/library/ubuntu:20.04
 RUN apt-get update && apt-get install -y \
     git \
     tmux \
-    python3.8 \
-    python3.8-venv \
+    python3.10 \
+    python3.10-venv \
     python3-pip
 
 # pip recognizes this variable
@@ -24,7 +24,7 @@ RUN mkdir -p /app/qefficient-library
 COPY . /app/qefficient-library
 
 # Create Virtual Env for the docker image
-RUN python3.8 -m venv /app/llm_env
+RUN python3.10 -m venv /app/llm_env
 RUN . /app/llm_env/bin/activate
 WORKDIR /app/qefficient-library
 
@@ -33,7 +33,7 @@ WORKDIR /app/qefficient-library
 RUN pip install torch==2.0.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu --no-deps
 RUN pip install datasets==2.17.0 fsspec==2023.10.0 multidict==6.0.5 sentencepiece --no-deps
 
-RUN python3.8 -m pip install .
+RUN python3.10 -m pip install .
 WORKDIR /app/qefficient-library
 
 # Set the environment variable for the model card name and token ID
@@ -45,7 +45,7 @@ ENV TOKEN_ID = ""
 # Print a success message
 CMD ["echo", "qefficient-transformers repository cloned and setup installed inside Docker image."]
 CMD ["echo", "Starting the Model Download and Export to Onnx Stage for QEff."]
-CMD python3.8 -m QEfficient.cloud.export --model-name "$MODEL_NAME"
+CMD python3.10 -m QEfficient.cloud.export --model-name "$MODEL_NAME"
 
 # Example usage:
 # docker build -t qefficient-library .

diff --git a/QEfficient/base/common.py b/QEfficient/base/common.py
@@ -46,9 +46,10 @@ def get_hf_model_type(hf_model_path: str) -> QEFF_MODEL_TYPE:
     """
     Loads model config file and returns the type of the model (i.e. LLMs, SD, quantized etc.) as supported by the library.
     """
-    assert os.path.isdir(
-        hf_model_path
-    ), "Pleae pass local dir path where the model is downloaded; use `QEfficient.utils.login_and_download_hf_lm` for downloading hf model"
+    if not os.path.isdir(hf_model_path):
+        raise FileNotFoundError(
+            "Please pass local dir path where the model is downloaded; use `QEfficient.utils.login_and_download_hf_lm` for downloading hf model"
+        )
     config, kwargs = AutoConfig.from_pretrained(
         hf_model_path,
         return_unused_kwargs=True,
@@ -78,15 +79,11 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) ->
         Downloads HuggingFace model if already doesn't exist locally, returns QEffAutoModel object based on type of model.
         """
         if not os.path.isdir(pretrained_model_name_or_path):
-            # Save model_card_name if passed
-            model_card_name = kwargs.pop("model_card_name", pretrained_model_name_or_path)
-            kwargs.update({"model_card_name": model_card_name})
             pretrained_model_name_or_path = login_and_download_hf_lm(pretrained_model_name_or_path, *args, **kwargs)
         model_type = get_hf_model_type(hf_model_path=pretrained_model_name_or_path)
         qeff_auto_model_class = MODEL_TYPE_TO_QEFF_AUTO_MODEL_MAP[model_type]
-        assert issubclass(
-            qeff_auto_model_class, QEFFBaseModel
-        ), f"Expected class that inherits {QEFFBaseModel}, got {type(qeff_auto_model_class)}"
+        if not issubclass(qeff_auto_model_class, QEFFBaseModel):
+            raise Exception(f"Expected class that inherits {QEFFBaseModel}, got {type(qeff_auto_model_class)}")
 
         return qeff_auto_model_class.from_pretrained(
             pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs