Skip to content

Commit 65a69cd

Browse files
authored
[Bug Fix] Refactor Linux container management to avoid race condition that leads the host to initialize placeholder (warmup) function (#10848)
1 parent 8627a33 commit 65a69cd

File tree

9 files changed

+626
-174
lines changed

9 files changed

+626
-174
lines changed

release_notes.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<!-- Please add your release notes in the following format:
44
- My change description (#PR)
55
-->
6+
67
- Update Java Worker Version to [2.18.1](https://github.com/Azure/azure-functions-java-worker/releases/tag/2.18.1)
78
- Introduced support for response compression, which can be enabled through explicit opt-in (#10870)
89
- Add support for new FeatureFlag `EnableAzureMonitorTimeIsoFormat` to enable iso time format for azmon logs for Linux Dedicated/EP Skus. (#10684)
@@ -14,3 +15,4 @@
1415
- Adding support for faas.invoke_duration metric and other spec related updates (#10929)
1516
- Increased the GC allocation budget value to improve cold start (#10953)
1617
- Fixed bug that could result in "Binding names must be unique" error (#10938)
18+
- Fix race condition that leads the host to initialize placeholder (warmup) function in Linux environments (#10848)

src/WebJobs.Script.WebHost/ContainerManagement/LinuxContainerInitializationHostedService.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the MIT License. See License.txt in the project root for license information.
33

4+
using System;
45
using System.Threading;
56
using System.Threading.Tasks;
67
using Microsoft.Azure.WebJobs.Script.WebHost.Management;
@@ -51,8 +52,15 @@ private async Task ApplyStartContextIfPresent(CancellationToken cancellationToke
5152
var assignmentContext = _startupContextProvider.SetContext(encryptedAssignmentContext);
5253
await SpecializeMSISideCar(assignmentContext);
5354

54-
bool success = _instanceManager.StartAssignment(assignmentContext);
55-
_logger.LogDebug($"StartAssignment invoked (Success={success})");
55+
try
56+
{
57+
bool success = await _instanceManager.AssignInstanceAsync(assignmentContext);
58+
_logger.LogDebug("AssignInstanceAsync was invoked (Success={success})", success);
59+
}
60+
catch (Exception ex)
61+
{
62+
_logger.LogError(ex, "Failed to assign instance.");
63+
}
5664
}
5765
else
5866
{

src/WebJobs.Script.WebHost/Management/IInstanceManager.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,18 @@ public interface IInstanceManager
1313

1414
Task<string> ValidateContext(HostAssignmentContext assignmentContext);
1515

16+
/// <summary>
17+
/// Asynchronously assigns a host instance.
18+
/// </summary>
19+
/// <param name="assignmentContext">The <see cref="HostAssignmentContext"/> that will be applied to the instance being assigned to the application.</param>
20+
/// <returns><see langword="true"/> if environment validation succeeds; otherwise <see langword="false"/>.</returns>
21+
Task<bool> AssignInstanceAsync(HostAssignmentContext assignmentContext);
22+
23+
/// <summary>
24+
/// Validates the assignment context and begins the assignment process in a "fire and forget" pattern.
25+
/// </summary>
26+
/// <param name="assignmentContext">The <see cref="HostAssignmentContext"/> that will be applied to the instance being assigned to the application.</param>
27+
/// <returns><see langword="true"/> if environment validation succeeds; otherwise <see langword="false"/>.</returns>
1628
bool StartAssignment(HostAssignmentContext assignmentContext);
1729

1830
Task<string> SpecializeMSISidecar(HostAssignmentContext assignmentContext);

src/WebJobs.Script.WebHost/Management/LinuxInstanceManager.cs

Lines changed: 81 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,11 @@
33

44
using System;
55
using System.Collections.Generic;
6-
using System.Linq;
76
using System.Net.Http;
8-
using System.Text;
97
using System.Threading.Tasks;
10-
using Microsoft.Azure.Storage;
11-
using Microsoft.Azure.Storage.File;
128
using Microsoft.Azure.WebJobs.Script.Diagnostics;
13-
using Microsoft.Azure.WebJobs.Script.WebHost.Configuration;
14-
using Microsoft.Azure.WebJobs.Script.WebHost.Management.LinuxSpecialization;
159
using Microsoft.Azure.WebJobs.Script.WebHost.Models;
1610
using Microsoft.Extensions.Logging;
17-
using Microsoft.Extensions.Options;
18-
using Newtonsoft.Json;
1911

2012
namespace Microsoft.Azure.WebJobs.Script.WebHost.Management
2113
{
@@ -29,6 +21,7 @@ public abstract class LinuxInstanceManager : IInstanceManager
2921
private readonly IEnvironment _environment;
3022
private readonly HttpClient _client;
3123
private readonly IScriptWebHostEnvironment _webHostEnvironment;
24+
private Task _assignment;
3225

3326
private HostAssignmentContext _assignmentContext;
3427

@@ -44,70 +37,101 @@ public LinuxInstanceManager(IHttpClientFactory httpClientFactory, IScriptWebHost
4437

4538
public abstract Task<string> SpecializeMSISidecar(HostAssignmentContext context);
4639

47-
public bool StartAssignment(HostAssignmentContext context)
40+
public async Task<bool> AssignInstanceAsync(HostAssignmentContext context)
4841
{
49-
if (!_webHostEnvironment.InStandbyMode)
42+
if (!IsValidEnvironment(context))
5043
{
51-
// This is only true when specializing pinned containers.
52-
if (!context.Environment.TryGetValue(EnvironmentSettingNames.ContainerStartContext, out string startContext))
44+
return false;
45+
}
46+
47+
if (context.IsWarmupRequest)
48+
{
49+
await HandleWarmupRequestAsync(context);
50+
return true;
51+
}
52+
53+
lock (_assignmentLock)
54+
{
55+
if (_assignmentContext == null)
56+
{
57+
_assignmentContext = context;
58+
_assignment = AssignAsync(context);
59+
}
60+
else if (!_assignmentContext.Equals(context))
5361
{
54-
_logger.LogError("Assign called while host is not in placeholder mode and start context is not present.");
5562
return false;
5663
}
5764
}
5865

59-
if (_environment.IsContainerReady())
66+
await _assignment;
67+
return true;
68+
}
69+
70+
public bool StartAssignment(HostAssignmentContext context)
71+
{
72+
if (!IsValidEnvironment(context))
6073
{
61-
_logger.LogError("Assign called while container is marked as specialized.");
6274
return false;
6375
}
6476

6577
if (context.IsWarmupRequest)
6678
{
67-
// Based on profiling download code jit-ing holds up cold start.
68-
// Pre-jit to avoid paying the cost later.
69-
Task.Run(async () => await DownloadWarmupAsync(context.GetRunFromPkgContext()));
79+
Task.Run(async () => await HandleWarmupRequestAsync(context));
7080
return true;
7181
}
72-
else if (_assignmentContext == null)
82+
83+
lock (_assignmentLock)
7384
{
74-
lock (_assignmentLock)
85+
if (_assignmentContext != null)
7586
{
76-
if (_assignmentContext != null)
77-
{
78-
return _assignmentContext.Equals(context);
79-
}
80-
_assignmentContext = context;
87+
return _assignmentContext.Equals(context);
8188
}
89+
_assignmentContext = context;
90+
_assignment = AssignAsync(context);
91+
}
8292

83-
_logger.LogInformation($"Starting Assignment. Cloud Name: {_environment.GetCloudName()}");
84-
85-
// set a flag which will cause any incoming http requests to buffer
86-
// until specialization is complete
87-
// the host is guaranteed not to receive any requests until AFTER assign
88-
// has been initiated, so setting this flag here is sufficient to ensure
89-
// that any subsequent incoming requests while the assign is in progress
90-
// will be delayed until complete
91-
_webHostEnvironment.DelayRequests();
93+
return true;
94+
}
9295

93-
// start the specialization process in the background
94-
Task.Run(async () => await AssignAsync(context));
96+
public abstract Task<string> ValidateContext(HostAssignmentContext assignmentContext);
9597

96-
return true;
98+
private bool IsValidEnvironment(HostAssignmentContext context)
99+
{
100+
if (!_webHostEnvironment.InStandbyMode)
101+
{
102+
// This is only true when specializing pinned containers.
103+
if (!context.Environment.TryGetValue(EnvironmentSettingNames.ContainerStartContext, out string startContext))
104+
{
105+
_logger.LogError("Assign called while host is not in placeholder mode and start context is not present.");
106+
return false;
107+
}
97108
}
98-
else
109+
110+
if (_environment.IsContainerReady())
99111
{
100-
// No lock needed here since _assignmentContext is not null when we are here
101-
return _assignmentContext.Equals(context);
112+
_logger.LogError("Assign called while container is marked as specialized.");
113+
return false;
102114
}
103-
}
104115

105-
public abstract Task<string> ValidateContext(HostAssignmentContext assignmentContext);
116+
return true;
117+
}
106118

107119
private async Task AssignAsync(HostAssignmentContext assignmentContext)
108120
{
121+
await Task.Yield(); // This may be called from within a lock. When AssignAsync is awaited, control flow will return to the caller and the lock will be released when it exits the lock scope.
122+
109123
try
110124
{
125+
_logger.LogInformation($"Starting Assignment. Cloud Name: {_environment.GetCloudName()}");
126+
127+
// set a flag which will cause any incoming http requests to buffer
128+
// until specialization is complete
129+
// the host is guaranteed not to receive any requests until AFTER assign
130+
// has been initiated, so setting this flag here is sufficient to ensure
131+
// that any subsequent incoming requests while the assign is in progress
132+
// will be delayed until complete
133+
_webHostEnvironment.DelayRequests();
134+
111135
// first make all environment and file system changes required for
112136
// the host to be specialized
113137
_logger.LogInformation("Applying {environmentCount} app setting(s)", assignmentContext.Environment.Count);
@@ -133,6 +157,21 @@ private async Task AssignAsync(HostAssignmentContext assignmentContext)
133157
}
134158
}
135159

160+
private async Task HandleWarmupRequestAsync(HostAssignmentContext assignmentContext)
161+
{
162+
try
163+
{
164+
await DownloadWarmupAsync(assignmentContext.GetRunFromPkgContext());
165+
}
166+
catch (Exception ex)
167+
{
168+
_logger.LogError(ex, "Warmup download failed");
169+
await _meshServiceClient.NotifyHealthEvent(ContainerHealthEventType.Warning, GetType(), "Warmup download failed");
170+
throw;
171+
}
172+
return;
173+
}
174+
136175
protected abstract Task ApplyContextAsync(HostAssignmentContext assignmentContext);
137176

138177
protected abstract Task<string> DownloadWarmupAsync(RunFromPackageContext context);

0 commit comments

Comments
 (0)