13
13
DEFAULT_PROFILE = 'default' # name of awscli / boto3 profile to target
14
14
DEFAULT_CLUSTER = 'test-ecs-cluster' # name of ECS cluster to target
15
15
DEFAULT_ACTION = 'replace' # 'reboot' or 'replace'
16
+ DEFAULT_WAIT = 30
16
17
18
+ WAIT_TIME = DEFAULT_WAIT
17
19
18
20
INSTANCE_FIELDS = ['ec2InstanceId' , 'containerInstanceArn' , 'status' , 'runningTasksCount' , 'pendingTasksCount' ]
19
21
@@ -34,7 +36,7 @@ def countdown(msg, t):
34
36
print ('{}...' .format (msg ))
35
37
while t :
36
38
mins , secs = divmod (t , 60 )
37
- timeformat = '{:02d}:{:02d}' .format (mins , secs )
39
+ timeformat = '{:02d}:{:02d}' .format (int ( mins ), int ( secs ) )
38
40
print (timeformat , end = '\r ' )
39
41
sleep (1 )
40
42
t -= 1
@@ -106,8 +108,8 @@ def set_scalein_protection_for_instances(as_client, asg, cluster_instances, prot
106
108
)
107
109
108
110
109
- def wait_until_instance_count (ecs_client , target_cluster , count , seconds = 60 ):
110
- countdown ('Waiting for cluster size change (expected instance count: {})' .format (count ), seconds )
111
+ def wait_until_instance_count (ecs_client , target_cluster , count , seconds = WAIT_TIME ):
112
+ countdown ('Waiting for cluster size change (expected instance count: {})' .format (count ), seconds * 3 )
111
113
current_instances = get_cluster_instances (ecs_client , target_cluster )
112
114
if len (current_instances ) != count :
113
115
yes_or_exit ('There are currently {} instances, but expecting {} - keep waiting?' .format (
@@ -117,7 +119,7 @@ def wait_until_instance_count(ecs_client, target_cluster, count, seconds=60):
117
119
118
120
119
121
def wait_until_instance_status (ecs_client , target_cluster , instance_id , status ):
120
- countdown ('Waiting for instance {} to have {} status' .format (instance_id , status ), 30 )
122
+ countdown ('Waiting for instance {} to have {} status' .format (instance_id , status ), WAIT_TIME / 2 )
121
123
current_instances = get_cluster_instances (ecs_client , target_cluster )
122
124
for instance in current_instances :
123
125
if instance [INSTANCE_FIELDS .index ('ec2InstanceId' )] == instance_id :
@@ -166,7 +168,7 @@ def wait_until_instance_drained(ecs_client, target_cluster, instance_id):
166
168
running = len (response ['taskArns' ])
167
169
drained = (running == 0 )
168
170
if not drained :
169
- countdown ('Waiting for instance {} to drain; currently running {} tasks' .format (instance_id , running ), 60 )
171
+ countdown ('Waiting for instance {} to drain; currently running {} tasks' .format (instance_id , running ), WAIT_TIME )
170
172
171
173
172
174
def wait_until_instance_ec2_ok (ec2_client , ec2_instance_id ):
@@ -176,7 +178,7 @@ def wait_until_instance_ec2_ok(ec2_client, ec2_instance_id):
176
178
status = response ['InstanceStatuses' ][0 ]['InstanceStatus' ]['Status' ]
177
179
ok = (status == 'ok' )
178
180
if not ok :
179
- countdown ('Waiting for instance {} to be \' ok\' ; currently \' {}\' ' .format (ec2_instance_id , status ), 60 )
181
+ countdown ('Waiting for instance {} to be \' ok\' ; currently \' {}\' ' .format (ec2_instance_id , status ), WAIT_TIME )
180
182
181
183
182
184
def wait_until_instance_ecs_connected (ecs_client , ecs_instance_id , target_cluster ):
@@ -252,7 +254,7 @@ def do_cluster_replace(profile, target_cluster):
252
254
i + 1 , len (cluster_instances ), ec2_instance_id , ecs_instance_id
253
255
))
254
256
255
- countdown ('Waiting for ASG to rightsize ECS cluster' , 60 )
257
+ countdown ('Waiting for ASG to rightsize ECS cluster' , WAIT_TIME )
256
258
wait_until_instance_count (ecs_client , target_cluster , len (cluster_instances ) + 1 )
257
259
new_instance = get_new_instance (
258
260
cluster_instances , replacement_instances , get_cluster_instances (ecs_client , target_cluster )
@@ -276,12 +278,12 @@ def do_cluster_replace(profile, target_cluster):
276
278
if i < (len (cluster_instances ) - 1 ):
277
279
# terminate an original instance
278
280
ec2_client .terminate_instances (InstanceIds = [ec2_instance_id , ])
279
- countdown ('Terminating original instance {} [{}]' .format (ec2_instance_id , ecs_instance_id ), 60 )
281
+ countdown ('Terminating original instance {} [{}]' .format (ec2_instance_id , ecs_instance_id ), WAIT_TIME )
280
282
else :
281
283
# for the final instance, just downsize cluster & let AS / ECS handle it
282
284
set_scalein_protection_for_instances (as_client , asg , replacement_instances , True )
283
285
bump_autoscaling_group (as_client , asg , - 1 )
284
- countdown ('Returned to original ASG size, waiting for ASG to downsize ECS cluster' , 120 )
286
+ countdown ('Returned to original ASG size, waiting for ASG to downsize ECS cluster' , WAIT_TIME * 2 )
285
287
wait_until_instance_count (ecs_client , target_cluster , len (cluster_instances ))
286
288
set_scalein_protection_for_instances (as_client , asg , replacement_instances , False )
287
289
@@ -298,7 +300,7 @@ def do_cluster_reboot(profile, target_cluster):
298
300
299
301
print ('Increasing ASG size by 1 to maintain cluster capacity during rolling reboot' )
300
302
bump_autoscaling_group (as_client , asg , 1 )
301
- countdown ('Waiting for ASG to upsize ECS cluster' , 60 )
303
+ countdown ('Waiting for ASG to upsize ECS cluster' , WAIT_TIME )
302
304
# wait until the additional instance joins the cluster
303
305
wait_until_instance_count (ecs_client , target_cluster , len (cluster_instances ) + 1 )
304
306
@@ -315,12 +317,12 @@ def do_cluster_reboot(profile, target_cluster):
315
317
wait_until_instance_drained (ecs_client , target_cluster , ecs_instance_id )
316
318
# 1st reboot instance (this picks up any unapplied security updates when it boots)
317
319
ec2_client .reboot_instances (InstanceIds = [ec2_instance_id , ])
318
- countdown ('Reboot (1/2) for instance {} [{}]' .format (ec2_instance_id , ecs_instance_id ), 60 )
320
+ countdown ('Reboot (1/2) for instance {} [{}]' .format (ec2_instance_id , ecs_instance_id ), WAIT_TIME )
319
321
wait_until_instance_ec2_ok (ec2_client , ec2_instance_id )
320
322
wait_until_instance_ecs_connected (ecs_client , ecs_instance_id , target_cluster )
321
323
# 2nd reboot of instance (boots to new kernel, if it was updated)
322
324
ec2_client .reboot_instances (InstanceIds = [ec2_instance_id , ])
323
- countdown ('Reboot (2/2) for instance {} [{}]' .format (ec2_instance_id , ecs_instance_id ), 60 )
325
+ countdown ('Reboot (2/2) for instance {} [{}]' .format (ec2_instance_id , ecs_instance_id ), WAIT_TIME )
324
326
wait_until_instance_ec2_ok (ec2_client , ec2_instance_id )
325
327
wait_until_instance_ecs_connected (ecs_client , ecs_instance_id , target_cluster )
326
328
# mark as ACTIVE and verify that it is
@@ -346,7 +348,7 @@ def do_cluster_reboot(profile, target_cluster):
346
348
# downsize cluster & wait until overflow instance is gone
347
349
set_scalein_protection_for_instances (as_client , asg , cluster_instances , True )
348
350
bump_autoscaling_group (as_client , asg , - 1 )
349
- countdown ('Returned to original ASG size, waiting for ASG to downsize ECS cluster' , 60 )
351
+ countdown ('Returned to original ASG size, waiting for ASG to downsize ECS cluster' , WAIT_TIME )
350
352
wait_until_instance_count (ecs_client , target_cluster , len (cluster_instances ))
351
353
set_scalein_protection_for_instances (as_client , asg , cluster_instances , False )
352
354
@@ -365,6 +367,10 @@ def do_cluster_reboot(profile, target_cluster):
365
367
'--profile' , '-p' , nargs = '?' , default = DEFAULT_PROFILE ,
366
368
help = 'Name of AWS profile to target (default: \' {0}\' )' .format (DEFAULT_PROFILE )
367
369
)
370
+ parser .add_argument (
371
+ '--wait' , '-w' , nargs = '?' , default = DEFAULT_WAIT , type = int ,
372
+ help = 'Base for timer to wait between actions (default: \' {0}\' )' .format (DEFAULT_WAIT )
373
+ )
368
374
parser .add_argument (
369
375
'--provider' , '-r' , nargs = '?' , default = DEFAULT_PROVIDER , choices = [PROVIDER_PROFILE , PROVIDER_ENV ],
370
376
help = 'AWS credential provider method to use (default: \' {0}\' , choose from [\' {1}\' ,\' {2}\' ])' .format (
@@ -376,6 +382,8 @@ def do_cluster_reboot(profile, target_cluster):
376
382
)
377
383
args = parser .parse_args ()
378
384
385
+ WAIT_TIME = args .wait
386
+
379
387
if args .provider == PROVIDER_PROFILE :
380
388
session = boto3 .Session ()
381
389
if args .profile not in session .available_profiles :
0 commit comments