Skip to content
This repository was archived by the owner on Dec 9, 2024. It is now read-only.

Commit d0d6a01

Browse files
authored
Skip creating and cleaning up TPUs if using a prestarted TPU (#522)
* Skip creating and cleaning up TPUs if using a prestarted TPU. * Add more logging. * Log new version id
1 parent 199ec02 commit d0d6a01

File tree

3 files changed

+14
-4
lines changed

3 files changed

+14
-4
lines changed

perfzero/lib/perfzero/perfzero_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def add_benchmark_parser_arguments(parser):
260260
default=None,
261261
type=str,
262262
help='''A json dictionary of cloud tpu parameters. The format must look like the following:
263-
{"name": "my-tpu-name", project": "my-gcp-project-id", "zone": "europe-west4-a", "size": "v3-8", "version": "nightly-2.x"}
263+
{"name": "my-tpu-name", "using_prestarted_tpu": "true/false", project": "my-gcp-project-id", "zone": "europe-west4-a", "size": "v3-8", "version": "nightly-2.x"}
264264
It can have an optional key value pair "version_id" -> "nightly version" to change the tpu version id.
265265
Example "version_id": "2.4.0-dev20200728".
266266
''')

perfzero/lib/perfzero/tpu_runtime_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ def _configure_tpu_version(tpu_name, version_label, new_version_id):
6363
logging.info('Trying to reset tpu version to %s', new_version_id)
6464
tpu_client.configure_tpu_version(version=new_version_id)
6565
tpu_client.wait_for_healthy()
66-
logging.info('TPU healthy after version reset.')
66+
logging.info('TPU healthy after version reset. New version id: %s', new_version_id)
6767
else:
68-
logging.info('Using the default tpu version id.')
68+
logging.info('Using the default or pre-started tpu version id.')
6969

7070
workers = tpu_client.network_endpoints()
7171
if workers:

perfzero/lib/perfzero/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,11 @@ def setup_tpu(parameters):
407407
Returns:
408408
True if an error occurs during setup.
409409
"""
410+
# Skip creating tpu if using a prestarted tpu.
411+
if parameters.get('using_prestarted_tpu') == 'true':
412+
logging.info('Skip creating TPU since the prestarted TPU %s is being used.',
413+
parameters.get('name'))
414+
return False
410415
try:
411416
base_cmd = 'gcloud compute tpus execution-groups create'
412417
args = [
@@ -437,7 +442,12 @@ def cleanup_tpu(parameters):
437442
Returns:
438443
True if an error occurs during cleanup.
439444
"""
440-
445+
# Skip cleaning up the tpu if using a prestarted tpu.
446+
if parameters.get('using_prestarted_tpu') == 'true':
447+
logging.info('Skip cleaning up TPU since the prestarted TPU %s is being used.',
448+
parameters.get('name'))
449+
return False
450+
441451
base_cmd = 'gcloud compute tpus execution-groups delete'
442452

443453
args = [

0 commit comments

Comments
 (0)