diff --git a/google/cloud/aiplatform/base.py b/google/cloud/aiplatform/base.py index 9df1d89f38..a04237ef11 100644 --- a/google/cloud/aiplatform/base.py +++ b/google/cloud/aiplatform/base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -69,6 +69,10 @@ def __init__(self, name: str): self._logger = logging.getLogger(name) self._logger.setLevel(logging.INFO) + if self._logger.handlers: + # Avoid writing duplicate logs if the logger is created twice. + return + handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.INFO) diff --git a/google/cloud/aiplatform/compat/__init__.py b/google/cloud/aiplatform/compat/__init__.py index 6b21fd94bf..e97c578999 100644 --- a/google/cloud/aiplatform/compat/__init__.py +++ b/google/cloud/aiplatform/compat/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,9 @@ if DEFAULT_VERSION == V1BETA1: services.dataset_service_client = services.dataset_service_client_v1beta1 + services.deployment_resource_pool_service_client = ( + services.deployment_resource_pool_service_client_v1beta1 + ) services.endpoint_service_client = services.endpoint_service_client_v1beta1 services.featurestore_online_serving_service_client = ( services.featurestore_online_serving_service_client_v1beta1 @@ -59,6 +62,7 @@ types.dataset = types.dataset_v1beta1 types.dataset_service = types.dataset_service_v1beta1 types.deployed_model_ref = types.deployed_model_ref_v1beta1 + types.deployment_resource_pool = types.deployment_resource_pool_v1beta1 types.encryption_spec = types.encryption_spec_v1beta1 types.endpoint = types.endpoint_v1beta1 types.endpoint_service = types.endpoint_service_v1beta1 diff --git a/google/cloud/aiplatform/compat/services/__init__.py b/google/cloud/aiplatform/compat/services/__init__.py index 627c77b258..25ff66b1d1 100644 --- a/google/cloud/aiplatform/compat/services/__init__.py +++ b/google/cloud/aiplatform/compat/services/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,9 @@ from google.cloud.aiplatform_v1beta1.services.dataset_service import ( client as dataset_service_client_v1beta1, ) +from google.cloud.aiplatform_v1beta1.services.deployment_resource_pool_service import ( + client as deployment_resource_pool_service_client_v1beta1, +) from google.cloud.aiplatform_v1beta1.services.endpoint_service import ( client as endpoint_service_client_v1beta1, ) @@ -119,6 +122,7 @@ vizier_service_client_v1, # v1beta1 dataset_service_client_v1beta1, + deployment_resource_pool_service_client_v1beta1, endpoint_service_client_v1beta1, featurestore_online_serving_service_client_v1beta1, featurestore_service_client_v1beta1, diff --git a/google/cloud/aiplatform/compat/types/__init__.py b/google/cloud/aiplatform/compat/types/__init__.py index ceb9d84157..0356bac918 100644 --- a/google/cloud/aiplatform/compat/types/__init__.py +++ b/google/cloud/aiplatform/compat/types/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,6 +30,8 @@ dataset_service as dataset_service_v1beta1, deployed_index_ref as matching_engine_deployed_index_ref_v1beta1, deployed_model_ref as deployed_model_ref_v1beta1, + deployment_resource_pool as deployment_resource_pool_v1beta1, + deployment_resource_pool_service as deployment_resource_pool_service_v1beta1, encryption_spec as encryption_spec_v1beta1, endpoint as endpoint_v1beta1, endpoint_service as endpoint_service_v1beta1, @@ -229,6 +231,8 @@ data_labeling_job_v1beta1, dataset_v1beta1, dataset_service_v1beta1, + deployment_resource_pool_v1beta1, + deployment_resource_pool_service_v1beta1, deployed_model_ref_v1beta1, encryption_spec_v1beta1, endpoint_v1beta1, diff --git a/google/cloud/aiplatform/preview/models.py b/google/cloud/aiplatform/preview/models.py new file mode 100644 index 0000000000..16517e7d11 --- /dev/null +++ b/google/cloud/aiplatform/preview/models.py @@ -0,0 +1,445 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations +import itertools +from typing import ( + Optional, + Sequence, +) + +from google.auth import credentials as auth_credentials +import proto + +from google.cloud.aiplatform import base +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform import models +from google.cloud.aiplatform import utils +from google.cloud.aiplatform.compat.services import ( + deployment_resource_pool_service_client_v1beta1, +) +from google.cloud.aiplatform.compat.types import ( + deployed_model_ref_v1beta1 as gca_deployed_model_ref_compat, + deployment_resource_pool_v1beta1 as gca_deployment_resource_pool_compat, + machine_resources_v1beta1 as gca_machine_resources_compat, +) + +_DEFAULT_MACHINE_TYPE = "n1-standard-2" + +_LOGGER = base.Logger(__name__) + + +class DeploymentResourcePool(base.VertexAiResourceNounWithFutureManager): + client_class = utils.DeploymentResourcePoolClientWithOverride + _resource_noun = "deploymentResourcePools" + _getter_method = "get_deployment_resource_pool" + _list_method = "list_deployment_resource_pools" + _delete_method = "delete_deployment_resource_pool" + _parse_resource_name_method = "parse_deployment_resource_pool_path" + _format_resource_name_method = "deployment_resource_pool_path" + + def __init__( + self, + deployment_resource_pool_name: str, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ): + """Retrieves a DeploymentResourcePool. + + Args: + deployment_resource_pool_name (str): + Required. The fully-qualified resource name or ID of the + deployment resource pool. Example: + "projects/123/locations/us-central1/deploymentResourcePools/456" + or "456" when project and location are initialized or passed. + project (str): + Optional. Project containing the deployment resource pool to + retrieve. If not set, the project given to `aiplatform.init` + will be used. + location (str): + Optional. Location containing the deployment resource pool to + retrieve. If not set, the location given to `aiplatform.init` + will be used. + credentials: Optional[auth_credentials.Credentials]=None, + Custom credentials to use to retrieve the deployment resource + pool. If not set, the credentials given to `aiplatform.init` + will be used. + """ + + super().__init__( + project=project, + location=location, + credentials=credentials, + resource_name=deployment_resource_pool_name, + ) + + deployment_resource_pool_name = utils.full_resource_name( + resource_name=deployment_resource_pool_name, + resource_noun=self._resource_noun, + parse_resource_name_method=self._parse_resource_name, + format_resource_name_method=self._format_resource_name, + project=project, + location=location, + ) + + self._gca_resource = self._get_gca_resource( + resource_name=deployment_resource_pool_name + ) + + @classmethod + def create( + cls, + deployment_resource_pool_id: str, + project: Optional[str] = None, + location: Optional[str] = None, + metadata: Sequence[tuple[str, str]] = (), + credentials: Optional[auth_credentials.Credentials] = None, + machine_type: Optional[str] = None, + min_replica_count: int = 1, + max_replica_count: int = 1, + accelerator_type: Optional[str] = None, + accelerator_count: Optional[int] = None, + autoscaling_target_cpu_utilization: Optional[int] = None, + autoscaling_target_accelerator_duty_cycle: Optional[int] = None, + sync=True, + create_request_timeout: Optional[float] = None, + ) -> "DeploymentResourcePool": + """Creates a new DeploymentResourcePool. + + Args: + deployment_resource_pool_id (str): + Required. User-specified name for the new deployment resource + pool. + project (str): + Optional. Project containing the deployment resource pool to + retrieve. If not set, the project given to `aiplatform.init` + will be used. + location (str): + Optional. Location containing the deployment resource pool to + retrieve. If not set, the location given to `aiplatform.init` + will be used. + metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as + metadata. + credentials: Optional[auth_credentials.Credentials]=None, + Optional. Custom credentials to use to retrieve the deployment + resource pool. If not set, the credentials given to + `aiplatform.init` will be used. + machine_type (str): + Optional. Machine type to use for the deployment resource pool. + If not set, the default machine type of `n1-standard-2` is + used. + min_replica_count (int): + Optional. The minimum replica count of the new deployment + resource pool. Each replica serves a copy of each model deployed + on the deployment resource pool. If this value is less than + `max_replica_count`, then autoscaling is enabled, and the actual + number of replicas will be adjusted to bring resource usage in + line with the autoscaling targets. + max_replica_count (int): + Optional. The maximum replica count of the new deployment + resource pool. + accelerator_type (str): + Optional. Hardware accelerator type. Must also set accelerator_ + count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, + NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or + NVIDIA_TESLA_A100. + accelerator_count (int): + Optional. The number of accelerators attached to each replica. + autoscaling_target_cpu_utilization (int): + Optional. Target CPU utilization value for autoscaling. A + default value of 60 will be used if not specified. + autoscaling_target_accelerator_duty_cycle (int): + Optional. Target accelerator duty cycle percentage to use for + autoscaling. Must also set accelerator_type and accelerator + count if specified. A default value of 60 will be used if + accelerators are requested and this is not specified. + sync (bool): + Optional. Whether to execute this method synchronously. If + False, this method will be executed in a concurrent Future and + any downstream object will be immediately returned and synced + when the Future has completed. + create_request_timeout (float): + Optional. The create request timeout in seconds. + + Returns: + DeploymentResourcePool + """ + + api_client = cls._instantiate_client(location=location, credentials=credentials) + + project = project or initializer.global_config.project + location = location or initializer.global_config.location + + return cls._create( + api_client=api_client, + deployment_resource_pool_id=deployment_resource_pool_id, + project=project, + location=location, + metadata=metadata, + credentials=credentials, + machine_type=machine_type, + min_replica_count=min_replica_count, + max_replica_count=max_replica_count, + accelerator_type=accelerator_type, + accelerator_count=accelerator_count, + autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, + autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, + sync=sync, + create_request_timeout=create_request_timeout, + ) + + @classmethod + @base.optional_sync() + def _create( + cls, + api_client: deployment_resource_pool_service_client_v1beta1.DeploymentResourcePoolServiceClient, + deployment_resource_pool_id: str, + project: Optional[str] = None, + location: Optional[str] = None, + metadata: Sequence[tuple[str, str]] = (), + credentials: Optional[auth_credentials.Credentials] = None, + machine_type: Optional[str] = None, + min_replica_count: int = 1, + max_replica_count: int = 1, + accelerator_type: Optional[str] = None, + accelerator_count: Optional[int] = None, + autoscaling_target_cpu_utilization: Optional[int] = None, + autoscaling_target_accelerator_duty_cycle: Optional[int] = None, + sync=True, + create_request_timeout: Optional[float] = None, + ) -> "DeploymentResourcePool": + """Creates a new DeploymentResourcePool. + + Args: + api_client (DeploymentResourcePoolServiceClient): + Required. DeploymentResourcePoolServiceClient used to make the + underlying CreateDeploymentResourcePool API call. + deployment_resource_pool_id (str): + Required. User-specified name for the new deployment resource + pool. + project (str): + Optional. Project containing the deployment resource pool to + retrieve. If not set, the project given to `aiplatform.init` + will be used. + location (str): + Optional. Location containing the deployment resource pool to + retrieve. If not set, the location given to `aiplatform.init` + will be used. + metadata (Sequence[Tuple[str, str]]): + Optional. Strings which should be sent along with the request as + metadata. + credentials: Optional[auth_credentials.Credentials]=None, + Optional. Custom credentials to use to retrieve the deployment + resource pool. If not set, the credentials given to + `aiplatform.init` will be used. + machine_type (str): + Optional. Machine type to use for the deployment resource pool. + If not set, the default machine type of `n1-standard-2` is + used. + min_replica_count (int): + Optional. The minimum replica count of the new deployment + resource pool. Each replica serves a copy of each model deployed + on the deployment resource pool. If this value is less than + `max_replica_count`, then autoscaling is enabled, and the actual + number of replicas will be adjusted to bring resource usage in + line with the autoscaling targets. + max_replica_count (int): + Optional. The maximum replica count of the new deployment + resource pool. + accelerator_type (str): + Optional. Hardware accelerator type. Must also set accelerator_ + count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, + NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or + NVIDIA_TESLA_A100. + accelerator_count (int): + Optional. The number of accelerators attached to each replica. + autoscaling_target_cpu_utilization (int): + Optional. Target CPU utilization value for autoscaling. A + default value of 60 will be used if not specified. + autoscaling_target_accelerator_duty_cycle (int): + Optional. Target accelerator duty cycle percentage to use for + autoscaling. Must also set accelerator_type and accelerator + count if specified. A default value of 60 will be used if + accelerators are requested and this is not specified. + sync (bool): + Optional. Whether to execute this method synchronously. If + False, this method will be executed in a concurrent Future and + any downstream object will be immediately returned and synced + when the Future has completed. + create_request_timeout (float): + Optional. The create request timeout in seconds. + + Returns: + DeploymentResourcePool + """ + + parent = initializer.global_config.common_location_path( + project=project, location=location + ) + + dedicated_resources = gca_machine_resources_compat.DedicatedResources( + min_replica_count=min_replica_count, + max_replica_count=max_replica_count, + ) + + machine_spec = gca_machine_resources_compat.MachineSpec( + machine_type=machine_type + ) + + if autoscaling_target_cpu_utilization: + autoscaling_metric_spec = ( + gca_machine_resources_compat.AutoscalingMetricSpec( + metric_name=( + "aiplatform.googleapis.com/prediction/online/cpu/utilization" + ), + target=autoscaling_target_cpu_utilization, + ) + ) + dedicated_resources.autoscaling_metric_specs.extend( + [autoscaling_metric_spec] + ) + + if accelerator_type and accelerator_count: + utils.validate_accelerator_type(accelerator_type) + machine_spec.accelerator_type = accelerator_type + machine_spec.accelerator_count = accelerator_count + + if autoscaling_target_accelerator_duty_cycle: + autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( + metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle", + target=autoscaling_target_accelerator_duty_cycle, + ) + dedicated_resources.autoscaling_metric_specs.extend( + [autoscaling_metric_spec] + ) + + dedicated_resources.machine_spec = machine_spec + + gapic_drp = gca_deployment_resource_pool_compat.DeploymentResourcePool( + dedicated_resources=dedicated_resources + ) + + operation_future = api_client.create_deployment_resource_pool( + parent=parent, + deployment_resource_pool=gapic_drp, + deployment_resource_pool_id=deployment_resource_pool_id, + metadata=metadata, + timeout=create_request_timeout, + ) + + _LOGGER.log_create_with_lro(cls, operation_future) + + created_drp = operation_future.result() + + _LOGGER.log_create_complete(cls, created_drp, "deployment resource pool") + + return cls._construct_sdk_resource_from_gapic( + gapic_resource=created_drp, + project=project, + location=location, + credentials=credentials, + ) + + def query_deployed_models( + self, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> list[gca_deployed_model_ref_compat.DeployedModelRef]: + """Lists the deployed models using this resource pool. + + Args: + project (str): + Optional. Project to retrieve list from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve list from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve list. Overrides + credentials set in aiplatform.init. + + Returns: + List of DeployedModelRef objects containing the endpoint ID and + deployed model ID of the deployed models using this resource pool. + """ + project = project or initializer.global_config.project + location = location or initializer.global_config.location + api_client = DeploymentResourcePool._instantiate_client( + location=location, credentials=credentials + ) + response = api_client.query_deployed_models( + deployment_resource_pool=self.resource_name + ) + return list( + itertools.chain(page.deployed_model_refs for page in response.pages) + ) + + @classmethod + def list( + cls, + filter: Optional[str] = None, + order_by: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> list["models.DeploymentResourcePool"]: + """Lists the deployment resource pools. + + filter (str): + Optional. An expression for filtering the results of the request. + For field names both snake_case and camelCase are supported. + order_by (str): + Optional. A comma-separated list of fields to order by, sorted in + ascending order. Use "desc" after a field name for descending. + Supported fields: `display_name`, `create_time`, `update_time` + project (str): + Optional. Project to retrieve list from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve list from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve list. Overrides + credentials set in aiplatform.init. + + Returns: + List of deployment resource pools. + """ + return cls._list( + filter=filter, + order_by=order_by, + project=project, + location=location, + credentials=credentials, + ) + + @classmethod + def _construct_sdk_resource_from_gapic( + cls, + gapic_resource: proto.Message, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> "models.DeploymentResourcePool": + drp = cls._empty_constructor( + project=project, location=location, credentials=credentials + ) + drp._gca_resource = gapic_resource + return drp diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index c997d64130..14f2e79ef8 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -37,6 +37,7 @@ from google.cloud.aiplatform.compat.services import ( dataset_service_client_v1beta1, + deployment_resource_pool_service_client_v1beta1, endpoint_service_client_v1beta1, featurestore_online_serving_service_client_v1beta1, featurestore_service_client_v1beta1, @@ -74,6 +75,7 @@ "VertexAiServiceClient", # v1beta1 dataset_service_client_v1beta1.DatasetServiceClient, + deployment_resource_pool_service_client_v1beta1.DeploymentResourcePoolServiceClient, endpoint_service_client_v1beta1.EndpointServiceClient, featurestore_online_serving_service_client_v1beta1.FeaturestoreOnlineServingServiceClient, featurestore_service_client_v1beta1.FeaturestoreServiceClient, @@ -486,6 +488,17 @@ class DatasetClientWithOverride(ClientWithOverride): ) +class DeploymentResourcePoolClientWithOverride(ClientWithOverride): + _is_temporary = True + _default_version = compat.V1BETA1 + _version_map = ( + ( + compat.V1BETA1, + deployment_resource_pool_service_client_v1beta1.DeploymentResourcePoolServiceClient, + ), + ) + + class EndpointClientWithOverride(ClientWithOverride): _is_temporary = True _default_version = compat.DEFAULT_VERSION diff --git a/tests/unit/aiplatform/test_deployment_resource_pools.py b/tests/unit/aiplatform/test_deployment_resource_pools.py new file mode 100644 index 0000000000..f0c319f52f --- /dev/null +++ b/tests/unit/aiplatform/test_deployment_resource_pools.py @@ -0,0 +1,350 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest + +from unittest import mock +from importlib import reload + +from google.api_core import operation as ga_operation +from google.auth import credentials as auth_credentials + +from google.cloud import aiplatform +from google.cloud.aiplatform import base +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform.preview import models + +from google.cloud.aiplatform.compat.services import ( + deployment_resource_pool_service_client_v1beta1 as deployment_resource_pool_service_client, +) + +from google.cloud.aiplatform.compat.types import ( + deployed_model_ref_v1beta1 as gca_deployed_model_ref, + deployment_resource_pool_v1beta1 as gca_deployment_resource_pool, + deployment_resource_pool_service_v1beta1 as gca_deployment_resource_pool_service, + endpoint as gca_endpoint, + machine_resources_v1beta1 as gca_machine_resources, +) + + +_TEST_PROJECT = "test-project" +_TEST_PROJECT_2 = "test-project-2" +_TEST_LOCATION = "us-central1" +_TEST_LOCATION_2 = "europe-west4" + +_TEST_DISPLAY_NAME = "test-display-name" +_TEST_DISPLAY_NAME_2 = "test-display-name-2" +_TEST_DISPLAY_NAME_3 = "test-display-name-3" +_TEST_ID = "1028944691210842416" +_TEST_ID_2 = "4366591682456584192" +_TEST_ID_3 = "5820582938582924817" +_TEST_DESCRIPTION = "test-description" +_TEST_REQUEST_METADATA = () +_TEST_TIMEOUT = None + +_TEST_ENDPOINT_NAME = ( + f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/endpoints/{_TEST_ID}" +) +_TEST_ENDPOINT_NAME_ALT_LOCATION = ( + f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION_2}/endpoints/{_TEST_ID}" +) +_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" +_TEST_MODEL_NAME = ( + f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_ID}" +) + +_TEST_DRP_NAME = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/deploymentResourcePools/{_TEST_ID}" +_TEST_VERSION_ID = "1" + +_TEST_NETWORK = f"projects/{_TEST_PROJECT}/global/networks/{_TEST_ID}" + +_TEST_MODEL_ID = "1028944691210842416" +_TEST_PREDICTION = [[1.0, 2.0, 3.0], [3.0, 3.0, 1.0]] +_TEST_INSTANCES = [[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]] +_TEST_CREDENTIALS = mock.Mock(spec=auth_credentials.AnonymousCredentials()) +_TEST_SERVICE_ACCOUNT = "vinnys@my-project.iam.gserviceaccount.com" + +_TEST_DEPLOYED_MODELS = [ + gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), + gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), + gca_endpoint.DeployedModel(id=_TEST_ID_3, display_name=_TEST_DISPLAY_NAME_3), +] + +_TEST_LONG_TRAFFIC_SPLIT_SORTED_IDS = ["m4", "m2", "m5", "m3", "m1"] +_TEST_LONG_DEPLOYED_MODELS = [ + gca_endpoint.DeployedModel(id=id, display_name=f"{id}_display_name") + for id in ["m1", "m2", "m3", "m4", "m5", "m6", "m7"] +] + +_TEST_MACHINE_TYPE = "n1-standard-32" +_TEST_ACCELERATOR_TYPE = "NVIDIA_TESLA_P100" +_TEST_ACCELERATOR_COUNT = 2 + +_TEST_METRIC_NAME_CPU_UTILIZATION = ( + "aiplatform.googleapis.com/prediction/online/cpu/utilization" +) +_TEST_METRIC_NAME_GPU_UTILIZATION = ( + "aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle" +) + +_TEST_MACHINE_SPEC = gca_machine_resources.MachineSpec( + machine_type=_TEST_MACHINE_TYPE, + accelerator_type=_TEST_ACCELERATOR_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, +) + +_TEST_AUTOSCALING_METRIC_SPECS = [ + gca_machine_resources.AutoscalingMetricSpec( + metric_name=_TEST_METRIC_NAME_CPU_UTILIZATION, target=70 + ), + gca_machine_resources.AutoscalingMetricSpec( + metric_name=_TEST_METRIC_NAME_GPU_UTILIZATION, target=70 + ), +] + +_TEST_DEDICATED_RESOURCES = gca_machine_resources.DedicatedResources( + machine_spec=_TEST_MACHINE_SPEC, + min_replica_count=10, + max_replica_count=20, + autoscaling_metric_specs=_TEST_AUTOSCALING_METRIC_SPECS, +) + + +@pytest.fixture +def get_drp_mock(): + with mock.patch.object( + deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient, + "get_deployment_resource_pool", + ) as get_drp_mock: + get_drp_mock.return_value = gca_deployment_resource_pool.DeploymentResourcePool( + name=_TEST_DRP_NAME, + dedicated_resources=_TEST_DEDICATED_RESOURCES, + ) + yield get_drp_mock + + +@pytest.fixture +def create_drp_mock(): + with mock.patch.object( + deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient, + "create_deployment_resource_pool", + ) as create_drp_mock: + create_drp_lro_mock = mock.Mock(ga_operation.Operation) + create_drp_lro_mock.result.return_value = ( + gca_deployment_resource_pool.DeploymentResourcePool( + name=_TEST_DRP_NAME, + dedicated_resources=_TEST_DEDICATED_RESOURCES, + ) + ) + create_drp_mock.return_value = create_drp_lro_mock + yield create_drp_mock + + +@pytest.fixture +def list_drp_mock(): + with mock.patch.object( + deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient, + "list_deployment_resource_pools", + ) as list_drp_mock: + list_drp_mock.return_value = [ + gca_deployment_resource_pool.DeploymentResourcePool( + name=_TEST_DRP_NAME, + dedicated_resources=_TEST_DEDICATED_RESOURCES, + ) + ] + yield list_drp_mock + + +@pytest.fixture +def delete_drp_mock(): + with mock.patch.object( + deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient, + "delete_deployment_resource_pool", + ) as delete_drp_mock: + delete_drp_lro_mock = mock.Mock(ga_operation.Operation) + delete_drp_lro_mock.result.return_value = ( + gca_deployment_resource_pool_service.DeleteDeploymentResourcePoolRequest() + ) + delete_drp_mock.return_value = delete_drp_lro_mock + yield delete_drp_mock + + +@pytest.fixture() +def query_deployed_models_mock(): + with mock.patch.object( + deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient, + "query_deployed_models", + ) as query_deployed_models_mock: + pager = mock.Mock() + pager.pages = [ + gca_deployment_resource_pool_service.QueryDeployedModelsResponse( + deployed_model_refs=[ + gca_deployed_model_ref.DeployedModelRef( + endpoint=_TEST_ID, + deployed_model_id=_TEST_ID_2, + ) + ], + total_deployed_model_count=2, + total_endpoint_count=1, + ), + gca_deployment_resource_pool_service.QueryDeployedModelsResponse( + deployed_model_refs=[ + gca_deployed_model_ref.DeployedModelRef( + endpoint=_TEST_ID, + deployed_model_id=_TEST_ID_3, + ) + ], + total_deployed_model_count=2, + total_endpoint_count=1, + ), + ] + query_deployed_models_mock.return_value = pager + yield query_deployed_models_mock + + +@pytest.mark.usefixtures("google_auth_mock") +class TestDeploymentResourcePool: + def setup_method(self): + reload(initializer) + reload(aiplatform) + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + def teardown_method(self): + initializer.global_pool.shutdown(wait=True) + + def test_constructor_gets_drp(self, get_drp_mock): + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + credentials=_TEST_CREDENTIALS, + ) + models.DeploymentResourcePool(_TEST_DRP_NAME) + get_drp_mock.assert_called_once_with( + name=_TEST_DRP_NAME, retry=base._DEFAULT_RETRY + ) + + @pytest.mark.usefixtures("get_drp_mock") + def test_constructor_with_conflicting_location_fails(self): + """Passing a full resource name with `_TEST_LOCATION` and providing `_TEST_LOCATION_2` as location""" + + with pytest.raises(RuntimeError) as err: + models.DeploymentResourcePool(_TEST_DRP_NAME, location=_TEST_LOCATION_2) + + assert err.match( + regexp=r"is provided, but different from the resource location" + ) + + @pytest.mark.usefixtures("create_drp_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create(self, create_drp_mock, sync): + test_drp = models.DeploymentResourcePool.create( + deployment_resource_pool_id=_TEST_ID, + machine_type=_TEST_MACHINE_TYPE, + min_replica_count=10, + max_replica_count=20, + accelerator_type=_TEST_ACCELERATOR_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, + autoscaling_target_cpu_utilization=70, + autoscaling_target_accelerator_duty_cycle=70, + sync=sync, + ) + + if not sync: + test_drp.wait() + + expected_drp = gca_deployment_resource_pool.DeploymentResourcePool( + dedicated_resources=_TEST_DEDICATED_RESOURCES + ) + + create_drp_mock.assert_called_once_with( + parent=_TEST_PARENT, + deployment_resource_pool_id=_TEST_ID, + deployment_resource_pool=expected_drp, + metadata=(), + timeout=None, + ) + + expected_drp.name = _TEST_DRP_NAME + + assert test_drp._gca_resource == expected_drp + + @pytest.mark.usefixtures("create_drp_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_with_timeout(self, create_drp_mock, sync): + test_drp = models.DeploymentResourcePool.create( + deployment_resource_pool_id=_TEST_ID, + machine_type=_TEST_MACHINE_TYPE, + min_replica_count=10, + max_replica_count=20, + accelerator_type=_TEST_ACCELERATOR_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, + autoscaling_target_cpu_utilization=70, + autoscaling_target_accelerator_duty_cycle=70, + sync=sync, + create_request_timeout=100, + ) + + if not sync: + test_drp.wait() + + expected_drp = gca_deployment_resource_pool.DeploymentResourcePool( + dedicated_resources=_TEST_DEDICATED_RESOURCES + ) + + create_drp_mock.assert_called_once_with( + parent=_TEST_PARENT, + deployment_resource_pool_id=_TEST_ID, + deployment_resource_pool=expected_drp, + metadata=(), + timeout=100, + ) + + expected_drp.name = _TEST_DRP_NAME + + assert test_drp._gca_resource == expected_drp + + @pytest.mark.usefixtures("list_drp_mock") + def test_list(self, list_drp_mock): + drp_list = models.DeploymentResourcePool.list() + + list_drp_mock.assert_called_once() + + for drp in drp_list: + assert type(drp) == models.DeploymentResourcePool + + @pytest.mark.usefixtures("delete_drp_mock", "get_drp_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_delete(self, delete_drp_mock, get_drp_mock, sync): + test_drp = models.DeploymentResourcePool( + deployment_resource_pool_name=_TEST_DRP_NAME + ) + test_drp.delete(sync=sync) + + if not sync: + test_drp.wait() + + delete_drp_mock.assert_called_once_with(name=test_drp.resource_name) + + @pytest.mark.usefixtures("query_deployed_models_mock", "get_drp_mock") + def test_query_deployed_models(self, query_deployed_models_mock, get_drp_mock): + test_drp = models.DeploymentResourcePool( + deployment_resource_pool_name=_TEST_DRP_NAME + ) + dm_refs = test_drp.query_deployed_models() + + assert len(dm_refs) == 2 + query_deployed_models_mock.assert_called_once()