Skip to content

Commit 748286b

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
chore: Refactor STZ arg checks for Endpoint.deploy and Model.deploy (preview).
PiperOrigin-RevId: 822218677
1 parent 9d139eb commit 748286b

File tree

3 files changed

+42
-58
lines changed

3 files changed

+42
-58
lines changed

google/cloud/aiplatform/models.py

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,9 +1235,6 @@ def _validate_deploy_args(
12351235
traffic_percentage: Optional[int],
12361236
deployment_resource_pool: Optional[DeploymentResourcePool],
12371237
required_replica_count: Optional[int],
1238-
initial_replica_count: Optional[int] = None,
1239-
min_scaleup_period: Optional[int] = None,
1240-
idle_scaledown_period: Optional[int] = None,
12411238
):
12421239
"""Helper method to validate deploy arguments.
12431240
@@ -1293,17 +1290,6 @@ def _validate_deploy_args(
12931290
set, the model deploy/mutate operation will succeed once
12941291
available_replica_count reaches required_replica_count, and the
12951292
rest of the replicas will be retried.
1296-
initial_replica_count (int):
1297-
Optional. The number of replicas to deploy the model with.
1298-
Only applicable for scale-to-zero deployments where
1299-
min_replica_count is 0.
1300-
min_scaleup_period (int):
1301-
Optional. For scale-to-zero deployments, Minimum duration that
1302-
a deployment will be scaled up before traffic is
1303-
evaluated for potential scale-down.
1304-
idle_scaledown_period (int):
1305-
Optional. For scale-to-zero deployments, duration of no traffic
1306-
before scaling to zero.
13071293
13081294
Raises:
13091295
ValueError: if Min or Max replica is negative. Traffic percentage > 100 or
@@ -1319,7 +1305,6 @@ def _validate_deploy_args(
13191305
and max_replica_count != 1
13201306
or required_replica_count
13211307
and required_replica_count != 0
1322-
or initial_replica_count
13231308
):
13241309
raise ValueError(
13251310
"Ignoring explicitly specified replica counts, "
@@ -1342,44 +1327,6 @@ def _validate_deploy_args(
13421327
raise ValueError("Required replica cannot be negative.")
13431328
if accelerator_type:
13441329
utils.validate_accelerator_type(accelerator_type)
1345-
if min_replica_count != 0:
1346-
if initial_replica_count:
1347-
raise ValueError(
1348-
"Initial replica count cannot be set for non-STZ models."
1349-
)
1350-
if min_scaleup_period:
1351-
raise ValueError(
1352-
"Min scaleup period cannot be set for non-STZ models."
1353-
)
1354-
if idle_scaledown_period:
1355-
raise ValueError(
1356-
"Idle scaledown period cannot be set for non-STZ models."
1357-
)
1358-
if min_replica_count == 0 and initial_replica_count:
1359-
if initial_replica_count < 0:
1360-
raise ValueError("Initial replica count must be at least 0.")
1361-
if initial_replica_count > max_replica_count:
1362-
raise ValueError(
1363-
"Initial replica count cannot be greater than max replica count."
1364-
)
1365-
if min_replica_count == 0 and min_scaleup_period:
1366-
if min_scaleup_period < 300:
1367-
raise ValueError(
1368-
"Min scaleup period cannot be less than 300 (5 minutes)."
1369-
)
1370-
if min_scaleup_period > 28800:
1371-
raise ValueError(
1372-
"Min scaleup period cannot be greater than 28800 (8 hours)."
1373-
)
1374-
if min_replica_count == 0 and idle_scaledown_period:
1375-
if idle_scaledown_period < 300:
1376-
raise ValueError(
1377-
"Idle scaledown period cannot be less than 300 (5 minutes)."
1378-
)
1379-
if idle_scaledown_period > 28800:
1380-
raise ValueError(
1381-
"Idle scaledown period cannot be greater than 28800 (8 hours)."
1382-
)
13831330

13841331
if deployed_model_display_name is not None:
13851332
utils.validate_display_name(deployed_model_display_name)

google/cloud/aiplatform/preview/models.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,46 @@ def _validate_deploy_args(
670670
"Minimum and maximum replica counts must not be specified"
671671
"if not using a shared resource pool."
672672
)
673+
# Validate STZ parameters
674+
if min_replica_count != 0:
675+
if initial_replica_count:
676+
raise ValueError(
677+
"Initial replica count cannot be set for non-STZ models."
678+
)
679+
if min_scaleup_period:
680+
raise ValueError(
681+
"Min scaleup period cannot be set for non-STZ models."
682+
)
683+
if idle_scaledown_period:
684+
raise ValueError(
685+
"Idle scaledown period cannot be set for non-STZ models."
686+
)
687+
if min_replica_count == 0 and initial_replica_count:
688+
if initial_replica_count < 0:
689+
raise ValueError("Initial replica count must be at least 0.")
690+
if initial_replica_count > max_replica_count:
691+
raise ValueError(
692+
"Initial replica count cannot be "
693+
"greater than max replica count."
694+
)
695+
if min_replica_count == 0 and min_scaleup_period:
696+
if min_scaleup_period < 300:
697+
raise ValueError(
698+
"Min scaleup period cannot be less than 300 (5 minutes)."
699+
)
700+
if min_scaleup_period > 28800:
701+
raise ValueError(
702+
"Min scaleup period cannot be greater than 28800 (8 hours)."
703+
)
704+
if min_replica_count == 0 and idle_scaledown_period:
705+
if idle_scaledown_period < 300:
706+
raise ValueError(
707+
"Idle scaledown period cannot be less than 300 (5 minutes)."
708+
)
709+
if idle_scaledown_period > 28800:
710+
raise ValueError(
711+
"Idle scaledown period cannot be greater than 28800 (8 hours)."
712+
)
673713
return aiplatform.Endpoint._validate_deploy_args(
674714
min_replica_count=min_replica_count,
675715
max_replica_count=max_replica_count,
@@ -679,9 +719,6 @@ def _validate_deploy_args(
679719
traffic_percentage=traffic_percentage,
680720
deployment_resource_pool=deployment_resource_pool,
681721
required_replica_count=required_replica_count,
682-
initial_replica_count=initial_replica_count,
683-
min_scaleup_period=min_scaleup_period,
684-
idle_scaledown_period=idle_scaledown_period,
685722
)
686723

687724
if (

tests/unit/aiplatform/test_endpoints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2460,8 +2460,8 @@ def test_deploy_disable_container_logging(self, deploy_model_mock, sync):
24602460
)
24612461
@pytest.mark.parametrize("sync", [True, False])
24622462
def test_deploy_endpoint_raise_error_invalid_stz_config(self, sync):
2463-
test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
2464-
test_model = models.Model(_TEST_ID)
2463+
test_endpoint = preview_models.Endpoint(_TEST_ENDPOINT_NAME)
2464+
test_model = preview_models.Model(_TEST_ID)
24652465
test_model._gca_resource.supported_deployment_resources_types.append(
24662466
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
24672467
)

0 commit comments

Comments
 (0)