Skip to content

Commit

Permalink
[GCP] Add gcp.force_enable_external_ips configuration (#3699)
Browse files Browse the repository at this point in the history
* Add boolean gcp.force_enable_external_ips to config schema

* Propagate config to cluster yaml

* I think this is right?

* Wire config to resource vars for template

* Configure subnet based on provider config

* Try to placate the yapf overlords

* Fix typo

* Add docs to gcp cloud permissions

* Add docs to config reference

* Appease CI

Co-authored-by: Zhanghao Wu <zhanghao.wu@outlook.com>

* Add smoke test

* Fix typo. Thanks test!

* I specified vcp not subnet name

* Add use internal ips test, also yapf

* Try to launch detached, doesn't work, need ctrl-c

* Test passes, cluster teardown now fails

* Remove force use interal ips test

* format.sh

* Specify `--cloud gcp --cpus 2` in smoke test cluster

Co-authored-by: Zhanghao Wu <zhanghao.wu@outlook.com>

---------

Co-authored-by: Zhanghao Wu <zhanghao.wu@outlook.com>
  • Loading branch information
Ultramann and Michaelvll authored Jul 9, 2024
1 parent 4507bdc commit 38b101d
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 11 deletions.
19 changes: 16 additions & 3 deletions docs/source/cloud-setup/cloud-permissions/gcp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ User
resourcemanager.projects.getIamPolicy
.. note::

For custom VPC users (with :code:`gcp.vpc_name` specified in :code:`~/.sky/config.yaml`, check `here <#_gcp-bring-your-vpc>`_), :code:`compute.firewalls.create` and :code:`compute.firewalls.delete` are not necessary unless opening ports is needed via `resources.ports` in task yaml.

.. note::
Expand Down Expand Up @@ -145,7 +145,7 @@ User
8. **Optional**: If the user needs to use custom machine images with ``sky launch --image-id``, you can additionally add the following permissions:

.. code-block:: text
compute.disks.get
compute.disks.resize
compute.images.get
Expand Down Expand Up @@ -297,7 +297,7 @@ To do so, you can use SkyPilot's global config file ``~/.sky/config.yaml`` to sp
use_internal_ips: true
# VPC with NAT setup, see below
vpc_name: my-vpc-name
ssh_proxy_command: ssh -W %h:%p -o StrictHostKeyChecking=no myself@my.proxy
ssh_proxy_command: ssh -W %h:%p -o StrictHostKeyChecking=no myself@my.proxy
The ``gcp.ssh_proxy_command`` field is optional. If SkyPilot is run on a machine that can directly access the internal IPs of the instances, it can be omitted. Otherwise, it should be set to a command that can be used to proxy SSH connections to the internal IPs of the instances.

Expand Down Expand Up @@ -338,3 +338,16 @@ If proxy is not needed, but the regions need to be limited, you can set the ``gc
ssh_proxy_command:
us-west1: null
us-east1: null
Force Enable Exteral IPs
~~~~~~~~~~~~~~~~~~~~~~~~

An alternative to setting up cloud NAT for instances that need to access the public internet but are in a VPC and communicated with via their internal IP is to force them to be created with an external IP address.

.. code-block:: yaml
gcp:
use_internal_ips: true
vpc_name: my-vpc-name
force_enable_external_ips: true
12 changes: 11 additions & 1 deletion docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,16 @@ Available fields and semantics:
#
# Default: false.
use_internal_ips: true
# Should instances in a vpc where communicated with via internal IPs still
# have an external IP? (optional)
#
# Set to true to force VMs to be assigned an exteral IP even when vpc_name
# and use_internal_ips are set.
#
# Default: false
force_enable_external_ips: true
# SSH proxy command (optional).
#
# Please refer to the aws.ssh_proxy_command section above for more details.
Expand Down Expand Up @@ -312,7 +322,7 @@ Available fields and semantics:
#
# Default: 900
provision_timeout: 900
# Identity to use for all GCP instances (optional).
#
Expand Down
3 changes: 3 additions & 0 deletions sky/clouds/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,9 @@ def make_deploy_resources_variables(
int(use_mig))
if use_mig:
resources_vars.update(managed_instance_group_config)
resources_vars[
'force_enable_external_ips'] = skypilot_config.get_nested(
('gcp', 'force_enable_external_ips'), False)
return resources_vars

def _get_feasible_launchable_resources(
Expand Down
16 changes: 11 additions & 5 deletions sky/provision/gcp/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,8 @@ def _configure_subnet(region: str, cluster_name: str,
'type': 'ONE_TO_ONE_NAT',
}],
}]
if config.provider_config.get('use_internal_ips', False):
enable_external_ips = _enable_external_ips(config)
if not enable_external_ips:
# Removing this key means the VM will not be assigned an external IP.
default_interfaces[0].pop('accessConfigs')

Expand All @@ -686,14 +687,19 @@ def _configure_subnet(region: str, cluster_name: str,
node_config['networkConfig'] = copy.deepcopy(default_interfaces)[0]
# TPU doesn't have accessConfigs
node_config['networkConfig'].pop('accessConfigs', None)
if config.provider_config.get('use_internal_ips', False):
node_config['networkConfig']['enableExternalIps'] = False
else:
node_config['networkConfig']['enableExternalIps'] = True
node_config['networkConfig']['enableExternalIps'] = enable_external_ips

return config


def _enable_external_ips(config: common.ProvisionConfig) -> bool:
force_enable_external_ips = config.provider_config.get(
'force_enable_external_ips', False)
use_internal_ips = config.provider_config.get('use_internal_ips', False)

return force_enable_external_ips or not use_internal_ips


def _delete_firewall_rule(project_id: str, compute, name):
operation = (compute.firewalls().delete(project=project_id,
firewall=name).execute())
Expand Down
3 changes: 2 additions & 1 deletion sky/templates/gcp-ray.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ docker:
provider:
# We use a custom node provider for GCP to create, stop and reuse instances.
type: external # type: gcp
module: sky.skylet.providers.gcp.GCPNodeProvider
module: sky.provision.gcp
region: {{region}}
availability_zone: {{zones}}
# Keep (otherwise cannot reuse when re-provisioning).
Expand All @@ -50,6 +50,7 @@ provider:
firewall_rule: {{firewall_rule}}
{% endif %}
use_internal_ips: {{use_internal_ips}}
force_enable_external_ips: {{force_enable_external_ips}}
{%- if tpu_vm %}
_has_tpus: True
{%- endif %}
Expand Down
3 changes: 3 additions & 0 deletions sky/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,9 @@ def get_config_schema():
}
}
},
'force_enable_external_ips': {
'type': 'boolean'
},
**_LABELS_SCHEMA,
**_NETWORK_CONFIG_SCHEMA,
},
Expand Down
24 changes: 23 additions & 1 deletion tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,28 @@ def test_gcp_mig():
run_one_test(test)


@pytest.mark.gcp
def test_gcp_force_enable_external_ips():
name = _get_cluster_name()
test_commands = [
f'sky launch -y -c {name} --cloud gcp --cpus 2 tests/test_yamls/minimal.yaml',
# Check network of vm is "default"
(f'gcloud compute instances list --filter=name~"{name}" --format='
'"value(networkInterfaces.network)" | grep "networks/default"'),
# Check External NAT in network access configs, corresponds to external ip
(f'gcloud compute instances list --filter=name~"{name}" --format='
'"value(networkInterfaces.accessConfigs[0].name)" | grep "External NAT"'
),
f'sky down -y {name}',
]
skypilot_config = 'tests/test_yamls/force_enable_external_ips_config.yaml'
test = Test('gcp_force_enable_external_ips',
test_commands,
f'sky down -y {name}',
env={'SKYPILOT_CONFIG': skypilot_config})
run_one_test(test)


@pytest.mark.aws
def test_image_no_conda():
name = _get_cluster_name()
Expand Down Expand Up @@ -3323,7 +3345,7 @@ def _check_replica_in_status(name: str, check_tuples: List[Tuple[int, bool,
"""Check replicas' status and count in sky serve status
We will check vCPU=2, as all our tests use vCPU=2.
Args:
name: the name of the service
check_tuples: A list of replica property to check. Each tuple is
Expand Down
4 changes: 4 additions & 0 deletions tests/test_yamls/force_enable_external_ips_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
gcp:
vpc_name: default
use_internal_ips: true
force_enable_external_ips: true
3 changes: 3 additions & 0 deletions tests/test_yamls/use_internal_ips_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
gcp:
vpc_name: default
use_internal_ips: true

0 comments on commit 38b101d

Please sign in to comment.