Skip to content

Commit

Permalink
Implementing suggestions for NuGet/Home#11027; increases delays and n…
Browse files Browse the repository at this point in the history
…umber of retries for http connection flakiness
  • Loading branch information
erdembayar authored and MattGal committed Aug 25, 2021
1 parent 563216c commit 6f4e808
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/NuGet.Core/NuGet.Common/GlobalSuppressions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@
[assembly: SuppressMessage("Build", "CA2237:Add [Serializable] to CommandLineArgumentCombinationException as this type implements ISerializable", Justification = "<Pending>", Scope = "type", Target = "~T:NuGet.Common.CommandLineArgumentCombinationException")]
[assembly: SuppressMessage("Build", "CA1012:Abstract type LoggerBase should not have constructors", Justification = "<Pending>", Scope = "type", Target = "~T:NuGet.Common.LoggerBase")]
[assembly: SuppressMessage("Build", "CA1815:SearchPathResult should override Equals.", Justification = "<Pending>", Scope = "type", Target = "~T:NuGet.Common.PathResolver.SearchPathResult")]
[assembly: SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "<Pending>", Scope = "member", Target = "~F:NuGet.Common.NuGetEnvironment._getHome")]
26 changes: 25 additions & 1 deletion src/NuGet.Core/NuGet.Common/PathUtil/NuGetEnvironment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static class NuGetEnvironment
#endif

private static readonly Lazy<string> _getHome = new Lazy<string>(() => GetHome());

private static bool? EnhancedHttpRetryIsEnabled = null;
public static string GetFolderPath(NuGetFolderPath folder)
{
switch (folder)
Expand Down Expand Up @@ -333,6 +333,30 @@ public static string GetDotNetLocation()
return DotNet;
}

public static bool EnhancedHttpRetryEnabled
{
get
{
if (EnhancedHttpRetryIsEnabled == null)
{
try
{
EnhancedHttpRetryIsEnabled = false;
var variableValue = Environment.GetEnvironmentVariable("NUGET_ENABLE_ENHANCED_HTTP_RETRY");
if (!string.IsNullOrEmpty(variableValue))
{
if (bool.TryParse(variableValue, out bool parsed))
{
EnhancedHttpRetryIsEnabled = parsed;
}
}
}
catch (Exception) { }
}
return (bool) EnhancedHttpRetryIsEnabled;
}
}

/// <summary>
/// Since <see cref="Environment.SpecialFolder"/> is not available on .NET Core, we have to
/// make our own and re-implement the functionality. On .NET Framework, we can use the
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
NuGet.Common.NuGetLogCode.NU1703 = 1703 -> NuGet.Common.NuGetLogCode
static NuGet.Common.NuGetEnvironment.EnhancedHttpRetryEnabled.get -> bool
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
NuGet.Common.NuGetLogCode.NU1703 = 1703 -> NuGet.Common.NuGetLogCode
static NuGet.Common.NuGetEnvironment.EnhancedHttpRetryEnabled.get -> bool
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
NuGet.Common.NuGetLogCode.NU1703 = 1703 -> NuGet.Common.NuGetLogCode
static NuGet.Common.NuGetEnvironment.EnhancedHttpRetryEnabled.get -> bool
19 changes: 17 additions & 2 deletions src/NuGet.Core/NuGet.Protocol/HttpSource/HttpRetryHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,24 @@ public async Task<HttpResponseMessage> SendAsync(

while (tries < request.MaxTries && !success)
{
if (tries > 0)
// There are many places where another variable named "MaxTries" is set to 1,
// so the Delay() never actually occurs.
// When opted in to "enhanced retry", do the delay and have it increase exponentially where applicable
// (i.e. when "tries" is allowed to be > 1)
if (tries > 0 || (NuGetEnvironment.EnhancedHttpRetryEnabled && request.IsRetry))
{
await Task.Delay(request.RetryDelay, cancellationToken);
// "Enhanced" retry: In the case where this is actually a 2nd-Nth try, back off with some random.
// In many cases due to the external retry loop, this will be always be 1 * request.RetryDelay.TotalMilliseconds + 0-200 ms
if (NuGetEnvironment.EnhancedHttpRetryEnabled)
{
await Task.Delay(TimeSpan.FromMilliseconds((Math.Pow(2, tries) * request.RetryDelay.TotalMilliseconds) + new Random().Next(200)));

}
// Old behavior; always delay a constant amount
else
{
await Task.Delay(request.RetryDelay, cancellationToken);
}
}

tries++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Collections.Generic;
using System.Net.Http;
using NuGet.Common;

namespace NuGet.Protocol
{
Expand All @@ -23,7 +24,14 @@ public HttpRetryHandlerRequest(HttpClient httpClient, Func<HttpRequestMessage> r
CompletionOption = HttpCompletionOption.ResponseHeadersRead;
MaxTries = DefaultMaxTries;
RequestTimeout = TimeSpan.FromSeconds(100);
RetryDelay = TimeSpan.FromMilliseconds(200);
if (NuGetEnvironment.EnhancedHttpRetryEnabled)
{
RetryDelay = TimeSpan.FromMilliseconds(400);
}
else
{
RetryDelay = TimeSpan.FromMilliseconds(200);
}
DownloadTimeout = DefaultDownloadTimeout;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,12 @@ private async Task<ServiceIndexResourceV3> GetServiceIndexResourceV3(
var httpSourceResource = await source.GetResourceAsync<HttpSourceResource>(token);
var client = httpSourceResource.HttpSource;

const int maxRetries = 3;
int maxRetries = 3;
if (NuGetEnvironment.EnhancedHttpRetryEnabled)
{
maxRetries = 6;
}

for (var retry = 1; retry <= maxRetries; retry++)
{
using (var sourceCacheContext = new SourceCacheContext())
Expand Down Expand Up @@ -162,6 +167,19 @@ private async Task<ServiceIndexResourceV3> GetServiceIndexResourceV3(
+ Environment.NewLine
+ ExceptionUtilities.DisplayMessage(ex);
log.LogMinimal(message);

if (NuGetEnvironment.EnhancedHttpRetryEnabled &&
ex.InnerException != null &&
ex.InnerException is IOException &&
ex.InnerException.InnerException != null &&
ex.InnerException.InnerException is System.Net.Sockets.SocketException)
{
// https://github.com/NuGet/Home/issues/11027
// An IO Exception with inner SocketException indicates server hangup ("Connection reset by peer").
// Azure DevOps feeds sporadically do this due to mandatory connection cycling.
// Stalling an five extra seconds gives extra time for the hosts to pick up new session tokens if it occurs.
await Task.Delay(5000);
}
}
catch (Exception ex) when (retry == maxRetries)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ namespace NuGet.Protocol
/// </summary>
public class HttpFileSystemBasedFindPackageByIdResource : FindPackageByIdResource
{
private const int MaxRetries = 3;
private const int DefaultMaxRetries = 3;
private int _maxRetries;
private readonly HttpSource _httpSource;
private readonly ConcurrentDictionary<string, AsyncLazy<SortedDictionary<NuGetVersion, PackageInfo>>> _packageInfoCache =
new ConcurrentDictionary<string, AsyncLazy<SortedDictionary<NuGetVersion, PackageInfo>>>(StringComparer.OrdinalIgnoreCase);
Expand Down Expand Up @@ -69,12 +70,22 @@ public HttpFileSystemBasedFindPackageByIdResource(
}

_baseUris = baseUris
.Take(MaxRetries)
.Take(DefaultMaxRetries)
.Select(uri => uri.OriginalString.EndsWith("/", StringComparison.Ordinal) ? uri : new Uri(uri.OriginalString + "/"))
.ToList();

_httpSource = httpSource;
_nupkgDownloader = new FindPackagesByIdNupkgDownloader(httpSource);

if (NuGetEnvironment.EnhancedHttpRetryEnabled)
{
_maxRetries = 6;
}
else
{
_maxRetries = DefaultMaxRetries;
}

}

/// <summary>
Expand Down Expand Up @@ -446,8 +457,8 @@ private async Task<SortedDictionary<NuGetVersion, PackageInfo>> FindPackagesById
ILogger logger,
CancellationToken cancellationToken)
{
// Try each base URI 3 times.
var maxTries = 3 * _baseUris.Count;
// Try each base URI _maxRetries times.
var maxTries = _maxRetries * _baseUris.Count;
var packageIdLowerCase = id.ToLowerInvariant();

for (var retry = 1; retry <= maxTries; ++retry)
Expand Down Expand Up @@ -497,14 +508,27 @@ private async Task<SortedDictionary<NuGetVersion, PackageInfo>> FindPackagesById
logger,
cancellationToken);
}
catch (Exception ex) when (retry < 3)
catch (Exception ex) when (retry < _maxRetries)
{
var message = string.Format(CultureInfo.CurrentCulture, Strings.Log_RetryingFindPackagesById, nameof(FindPackagesByIdAsync), uri)
+ Environment.NewLine
+ ExceptionUtilities.DisplayMessage(ex);
logger.LogMinimal(message);

if (NuGetEnvironment.EnhancedHttpRetryEnabled &&
ex.InnerException != null &&
ex.InnerException is IOException &&
ex.InnerException.InnerException != null &&
ex.InnerException.InnerException is System.Net.Sockets.SocketException)
{
// https://github.com/NuGet/Home/issues/11027
// An IO Exception with inner SocketException indicates server hangup ("Connection reset by peer").
// Azure DevOps feeds sporadically do this due to mandatory connection cycling.
// Stalling an five extra seconds gives extra time for the hosts to pick up new session tokens if it occurs.
await Task.Delay(5000);
}
}
catch (Exception ex) when (retry == 3)
catch (Exception ex) when (retry == _maxRetries)
{
var message = string.Format(
CultureInfo.CurrentCulture,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,13 @@ private async Task<T> ProcessHttpSourceResultAsync<T>(
ILogger logger,
CancellationToken token)
{
const int maxRetries = 3;
int maxRetries = 3;

if (NuGetEnvironment.EnhancedHttpRetryEnabled)
{
maxRetries = 6;
}

for (var retry = 1; retry <= maxRetries; ++retry)
{
var httpSourceCacheContext = HttpSourceCacheContext.Create(cacheContext, isFirstAttempt: retry == 1);
Expand Down Expand Up @@ -300,6 +306,18 @@ private async Task<T> ProcessHttpSourceResultAsync<T>(
+ ExceptionUtilities.DisplayMessage(ex);

logger.LogMinimal(message);
if (NuGetEnvironment.EnhancedHttpRetryEnabled &&
ex.InnerException != null &&
ex.InnerException is IOException &&
ex.InnerException.InnerException != null &&
ex.InnerException.InnerException is System.Net.Sockets.SocketException)
{
// https://github.com/NuGet/Home/issues/11027
// An IO Exception with inner SocketException indicates server hangup ("Connection reset by peer").
// Azure DevOps feeds sporadically do this due to mandatory connection cycling.
// Stalling an five extra seconds gives extra time for the hosts to pick up new session tokens if it occurs.
await Task.Delay(5000);
}
}
catch (Exception ex) when (retry == maxRetries)
{
Expand Down

0 comments on commit 6f4e808

Please sign in to comment.