Skip to content
This repository was archived by the owner on Dec 9, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion perfzero/lib/perfzero/perfzero_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def add_benchmark_parser_arguments(parser):
default=None,
type=str,
help='''A json dictionary of cloud tpu parameters. The format must look like the following:
{"name": "my-tpu-name", project": "my-gcp-project-id", "zone": "europe-west4-a", "size": "v3-8", "version": "nightly-2.x"}
{"name": "my-tpu-name", "using_prestarted_tpu": "true/false", project": "my-gcp-project-id", "zone": "europe-west4-a", "size": "v3-8", "version": "nightly-2.x"}
It can have an optional key value pair "version_id" -> "nightly version" to change the tpu version id.
Example "version_id": "2.4.0-dev20200728".
''')
Expand Down
4 changes: 2 additions & 2 deletions perfzero/lib/perfzero/tpu_runtime_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ def _configure_tpu_version(tpu_name, version_label, new_version_id):
logging.info('Trying to reset tpu version to %s', new_version_id)
tpu_client.configure_tpu_version(version=new_version_id)
tpu_client.wait_for_healthy()
logging.info('TPU healthy after version reset.')
logging.info('TPU healthy after version reset. New version id: %s', new_version_id)
else:
logging.info('Using the default tpu version id.')
logging.info('Using the default or pre-started tpu version id.')

workers = tpu_client.network_endpoints()
if workers:
Expand Down
12 changes: 11 additions & 1 deletion perfzero/lib/perfzero/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,11 @@ def setup_tpu(parameters):
Returns:
True if an error occurs during setup.
"""
# Skip creating tpu if using a prestarted tpu.
if parameters.get('using_prestarted_tpu') == 'true':
logging.info('Skip creating TPU since the prestarted TPU %s is being used.',
parameters.get('name'))
return False
try:
base_cmd = 'gcloud compute tpus execution-groups create'
args = [
Expand Down Expand Up @@ -437,7 +442,12 @@ def cleanup_tpu(parameters):
Returns:
True if an error occurs during cleanup.
"""

# Skip cleaning up the tpu if using a prestarted tpu.
if parameters.get('using_prestarted_tpu') == 'true':
logging.info('Skip cleaning up TPU since the prestarted TPU %s is being used.',
parameters.get('name'))
return False

base_cmd = 'gcloud compute tpus execution-groups delete'

args = [
Expand Down