feat(inference): add modelId and quantization to updateDeploymentRequest (#912)

scaleway-bot · web-flow · commit c96da693670e · 2025-03-18T10:28:39.000Z
diff --git a/scaleway-async/scaleway_async/inference/v1/api.py b/scaleway-async/scaleway_async/inference/v1/api.py
@@ -311,6 +311,8 @@ async def update_deployment(
         tags: Optional[List[str]] = None,
         min_size: Optional[int] = None,
         max_size: Optional[int] = None,
+        model_id: Optional[str] = None,
+        quantization: Optional[DeploymentQuantization] = None,
     ) -> Deployment:
         """
         Update a deployment.
@@ -321,6 +323,8 @@ async def update_deployment(
         :param tags: List of tags to apply to the deployment.
         :param min_size: Defines the new minimum size of the pool.
         :param max_size: Defines the new maximum size of the pool.
+        :param model_id: Id of the model to set to the deployment.
+        :param quantization: Quantization to use to the deployment.
         :return: :class:`Deployment <Deployment>`
 
         Usage:
@@ -347,6 +351,8 @@ async def update_deployment(
                     tags=tags,
                     min_size=min_size,
                     max_size=max_size,
+                    model_id=model_id,
+                    quantization=quantization,
                 ),
                 self.client,
             ),
diff --git a/scaleway-async/scaleway_async/inference/v1/marshalling.py b/scaleway-async/scaleway_async/inference/v1/marshalling.py
@@ -639,6 +639,14 @@ def marshal_UpdateDeploymentRequest(
     if request.max_size is not None:
         output["max_size"] = request.max_size
 
+    if request.model_id is not None:
+        output["model_id"] = request.model_id
+
+    if request.quantization is not None:
+        output["quantization"] = marshal_DeploymentQuantization(
+            request.quantization, defaults
+        )
+
     return output
 
 
diff --git a/scaleway-async/scaleway_async/inference/v1/types.py b/scaleway-async/scaleway_async/inference/v1/types.py
@@ -750,6 +750,16 @@ class UpdateDeploymentRequest:
     Defines the new maximum size of the pool.
     """
 
+    model_id: Optional[str]
+    """
+    Id of the model to set to the deployment.
+    """
+
+    quantization: Optional[DeploymentQuantization]
+    """
+    Quantization to use to the deployment.
+    """
+
 
 @dataclass
 class UpdateEndpointRequest:
diff --git a/scaleway/scaleway/inference/v1/api.py b/scaleway/scaleway/inference/v1/api.py
@@ -309,6 +309,8 @@ def update_deployment(
         tags: Optional[List[str]] = None,
         min_size: Optional[int] = None,
         max_size: Optional[int] = None,
+        model_id: Optional[str] = None,
+        quantization: Optional[DeploymentQuantization] = None,
     ) -> Deployment:
         """
         Update a deployment.
@@ -319,6 +321,8 @@ def update_deployment(
         :param tags: List of tags to apply to the deployment.
         :param min_size: Defines the new minimum size of the pool.
         :param max_size: Defines the new maximum size of the pool.
+        :param model_id: Id of the model to set to the deployment.
+        :param quantization: Quantization to use to the deployment.
         :return: :class:`Deployment <Deployment>`
 
         Usage:
@@ -345,6 +349,8 @@ def update_deployment(
                     tags=tags,
                     min_size=min_size,
                     max_size=max_size,
+                    model_id=model_id,
+                    quantization=quantization,
                 ),
                 self.client,
             ),
diff --git a/scaleway/scaleway/inference/v1/marshalling.py b/scaleway/scaleway/inference/v1/marshalling.py
@@ -639,6 +639,14 @@ def marshal_UpdateDeploymentRequest(
     if request.max_size is not None:
         output["max_size"] = request.max_size
 
+    if request.model_id is not None:
+        output["model_id"] = request.model_id
+
+    if request.quantization is not None:
+        output["quantization"] = marshal_DeploymentQuantization(
+            request.quantization, defaults
+        )
+
     return output
 
 
diff --git a/scaleway/scaleway/inference/v1/types.py b/scaleway/scaleway/inference/v1/types.py
@@ -750,6 +750,16 @@ class UpdateDeploymentRequest:
     Defines the new maximum size of the pool.
     """
 
+    model_id: Optional[str]
+    """
+    Id of the model to set to the deployment.
+    """
+
+    quantization: Optional[DeploymentQuantization]
+    """
+    Quantization to use to the deployment.
+    """
+
 
 @dataclass
 class UpdateEndpointRequest: