Skip to content

Commit c96da69

Browse files
authored
feat(inference): add modelId and quantization to updateDeploymentRequest (#912)
1 parent 98688fc commit c96da69

File tree

6 files changed

+48
-0
lines changed

6 files changed

+48
-0
lines changed

scaleway-async/scaleway_async/inference/v1/api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ async def update_deployment(
311311
tags: Optional[List[str]] = None,
312312
min_size: Optional[int] = None,
313313
max_size: Optional[int] = None,
314+
model_id: Optional[str] = None,
315+
quantization: Optional[DeploymentQuantization] = None,
314316
) -> Deployment:
315317
"""
316318
Update a deployment.
@@ -321,6 +323,8 @@ async def update_deployment(
321323
:param tags: List of tags to apply to the deployment.
322324
:param min_size: Defines the new minimum size of the pool.
323325
:param max_size: Defines the new maximum size of the pool.
326+
:param model_id: Id of the model to set to the deployment.
327+
:param quantization: Quantization to use to the deployment.
324328
:return: :class:`Deployment <Deployment>`
325329
326330
Usage:
@@ -347,6 +351,8 @@ async def update_deployment(
347351
tags=tags,
348352
min_size=min_size,
349353
max_size=max_size,
354+
model_id=model_id,
355+
quantization=quantization,
350356
),
351357
self.client,
352358
),

scaleway-async/scaleway_async/inference/v1/marshalling.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,14 @@ def marshal_UpdateDeploymentRequest(
639639
if request.max_size is not None:
640640
output["max_size"] = request.max_size
641641

642+
if request.model_id is not None:
643+
output["model_id"] = request.model_id
644+
645+
if request.quantization is not None:
646+
output["quantization"] = marshal_DeploymentQuantization(
647+
request.quantization, defaults
648+
)
649+
642650
return output
643651

644652

scaleway-async/scaleway_async/inference/v1/types.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,16 @@ class UpdateDeploymentRequest:
750750
Defines the new maximum size of the pool.
751751
"""
752752

753+
model_id: Optional[str]
754+
"""
755+
Id of the model to set to the deployment.
756+
"""
757+
758+
quantization: Optional[DeploymentQuantization]
759+
"""
760+
Quantization to use to the deployment.
761+
"""
762+
753763

754764
@dataclass
755765
class UpdateEndpointRequest:

scaleway/scaleway/inference/v1/api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ def update_deployment(
309309
tags: Optional[List[str]] = None,
310310
min_size: Optional[int] = None,
311311
max_size: Optional[int] = None,
312+
model_id: Optional[str] = None,
313+
quantization: Optional[DeploymentQuantization] = None,
312314
) -> Deployment:
313315
"""
314316
Update a deployment.
@@ -319,6 +321,8 @@ def update_deployment(
319321
:param tags: List of tags to apply to the deployment.
320322
:param min_size: Defines the new minimum size of the pool.
321323
:param max_size: Defines the new maximum size of the pool.
324+
:param model_id: Id of the model to set to the deployment.
325+
:param quantization: Quantization to use to the deployment.
322326
:return: :class:`Deployment <Deployment>`
323327
324328
Usage:
@@ -345,6 +349,8 @@ def update_deployment(
345349
tags=tags,
346350
min_size=min_size,
347351
max_size=max_size,
352+
model_id=model_id,
353+
quantization=quantization,
348354
),
349355
self.client,
350356
),

scaleway/scaleway/inference/v1/marshalling.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,14 @@ def marshal_UpdateDeploymentRequest(
639639
if request.max_size is not None:
640640
output["max_size"] = request.max_size
641641

642+
if request.model_id is not None:
643+
output["model_id"] = request.model_id
644+
645+
if request.quantization is not None:
646+
output["quantization"] = marshal_DeploymentQuantization(
647+
request.quantization, defaults
648+
)
649+
642650
return output
643651

644652

scaleway/scaleway/inference/v1/types.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,16 @@ class UpdateDeploymentRequest:
750750
Defines the new maximum size of the pool.
751751
"""
752752

753+
model_id: Optional[str]
754+
"""
755+
Id of the model to set to the deployment.
756+
"""
757+
758+
quantization: Optional[DeploymentQuantization]
759+
"""
760+
Quantization to use to the deployment.
761+
"""
762+
753763

754764
@dataclass
755765
class UpdateEndpointRequest:

0 commit comments

Comments
 (0)