Skip to content

Only start flush logs timer when there's an event to write #11100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
- Memory allocation optimizations in `FeatureFlags.IsEnabled` by adopting zero-allocation `ContainsToken` for efficient delimited token search (#11075)
- Improvements to coldstart pipeline (#11102).
- Update Python Worker Version to [4.38.0](https://github.com/Azure/azure-functions-python-worker/releases/tag/4.38.0)
- Only start the Diagnostic Events flush logs timer when events are present, preventing unnecessary flush attempts (#11100).
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ private static class Logger
LoggerMessage.Define<string>(LogLevel.Debug, new EventId(6, nameof(DeletingTableWithOutdatedEventVersion)), "Deleting table '{tableName}' as it contains records with an outdated EventVersion.");

private static readonly Action<ILogger, Exception> _errorPurgingDiagnosticEventVersions =
LoggerMessage.Define(LogLevel.Error, new EventId(7, nameof(ErrorPurgingDiagnosticEventVersions)), "Error occurred when attempting to purge previous diagnostic event versions.");
LoggerMessage.Define(LogLevel.Warning, new EventId(7, nameof(ErrorPurgingDiagnosticEventVersions)), "Error occurred when attempting to purge previous diagnostic event versions.");

private static readonly Action<ILogger, Exception> _unableToGetTableReference =
LoggerMessage.Define(LogLevel.Error, new EventId(8, nameof(UnableToGetTableReference)), "Unable to get table reference. Aborting write operation.");
LoggerMessage.Define(LogLevel.Warning, new EventId(8, nameof(UnableToGetTableReference)), "Unable to get table reference. Aborting write operation.");

private static readonly Action<ILogger, Exception> _unableToGetTableReferenceOrCreateTable =
LoggerMessage.Define(LogLevel.Error, new EventId(9, nameof(UnableToGetTableReferenceOrCreateTable)), "Unable to get table reference or create table. Aborting write operation.");
LoggerMessage.Define(LogLevel.Warning, new EventId(9, nameof(UnableToGetTableReferenceOrCreateTable)), "Unable to get table reference or create table. Aborting write operation.");

private static readonly Action<ILogger, Exception> _unableToWriteDiagnosticEvents =
LoggerMessage.Define(LogLevel.Error, new EventId(10, nameof(UnableToWriteDiagnosticEvents)), "Unable to write diagnostic events to table storage.");
LoggerMessage.Define(LogLevel.Warning, new EventId(10, nameof(UnableToWriteDiagnosticEvents)), "Unable to write diagnostic events to table storage.");

private static readonly Action<ILogger, Exception> _primaryHostStateProviderNotAvailable =
LoggerMessage.Define(LogLevel.Debug, new EventId(11, nameof(PrimaryHostStateProviderNotAvailable)), "PrimaryHostStateProvider is not available. Skipping the check for primary host.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public partial class DiagnosticEventTableStorageRepository : IDiagnosticEventRep
private const int LogFlushInterval = 1000 * 60 * 10; // 10 minutes
private const int TableCreationMaxRetryCount = 5;

private readonly Timer _flushLogsTimer;
private readonly Lazy<Timer> _flushLogsTimer;
private readonly IHostIdProvider _hostIdProvider;
private readonly IEnvironment _environment;
private readonly IAzureTableStorageProvider _azureTableStorageProvider;
Expand All @@ -50,7 +50,7 @@ internal DiagnosticEventTableStorageRepository(IHostIdProvider hostIdProvider, I
_environment = environment;
_serviceProvider = scriptHostManager as IServiceProvider;
_logger = logger;
_flushLogsTimer = new Timer(OnFlushLogs, null, logFlushInterval, logFlushInterval);
_flushLogsTimer = new Lazy<Timer>(() => new Timer(OnFlushLogs, null, logFlushInterval, logFlushInterval));
_azureTableStorageProvider = azureTableStorageProvider;
}

Expand Down Expand Up @@ -107,7 +107,7 @@ internal string HostId
{
get
{
if (!_environment.IsPlaceholderModeEnabled() && string.IsNullOrEmpty(_hostId))
if (string.IsNullOrEmpty(_hostId) && !_environment.IsPlaceholderModeEnabled())
{
_hostId = _hostIdProvider?.GetHostIdAsync(CancellationToken.None).GetAwaiter().GetResult();
}
Expand Down Expand Up @@ -192,9 +192,23 @@ await Utility.InvokeWithRetriesAsync(async () =>

_purged = true;
}
catch (RequestFailedException ex) when (ex.Status == (int)HttpStatusCode.Forbidden)
{
// If we reach this point, we already checked for permissions on TableClient initialization.
// It is possible that the permissions changed after the initialization, any firewall/network rules were changed or it's a custom role where we don't have permissions to query entities.
// We will log the error and disable the service.
Logger.ErrorPurgingDiagnosticEventVersions(_logger, ex);
DisableService();
Logger.ServiceDisabledUnauthorizedClient(_logger, ex);
}
catch (Exception ex)
{
// We failed to connect to the table storage account. This could be due to a transient error or a configuration issue (e.g., network problems).
// To avoid repeatedly retrying in a potentially unhealthy state, we will disable the service.
// The operation may succeed in a future instance if the underlying issue is resolved.
Logger.ErrorPurgingDiagnosticEventVersions(_logger, ex);
DisableService();
Logger.ServiceDisabledUnableToConnectToStorage(_logger, ex);
}
}, maxRetries: 5, retryInterval: TimeSpan.FromSeconds(5));

Expand Down Expand Up @@ -245,12 +259,16 @@ internal virtual async Task FlushLogs(TableClient table = null)
Logger.UnableToGetTableReferenceOrCreateTable(_logger, ex);
DisableService();
Logger.ServiceDisabledUnauthorizedClient(_logger, ex);
return;
}
catch (Exception ex)
{
// We failed to connect to the table storage account. This could be due to a transient error or a configuration issue (e.g., network problems).
// To avoid repeatedly retrying in a potentially unhealthy state, we will disable the service.
// The operation may succeed in a future instance if the underlying issue is resolved.
Logger.UnableToGetTableReferenceOrCreateTable(_logger, ex);
// Clearing the memory cache to avoid memory build up.
_events.Clear();
DisableService();
Logger.ServiceDisabledUnableToConnectToStorage(_logger, ex);
return;
}

Expand Down Expand Up @@ -292,12 +310,14 @@ internal async Task ExecuteBatchAsync(ConcurrentDictionary<string, DiagnosticEve
catch (Exception ex)
{
Logger.UnableToWriteDiagnosticEvents(_logger, ex);
DisableService();
Logger.ServiceDisabledUnableToConnectToStorage(_logger, ex);
}
}

public void WriteDiagnosticEvent(DateTime timestamp, string errorCode, LogLevel level, string message, string helpLink, Exception exception)
{
if (TableClient is null || string.IsNullOrEmpty(HostId))
if (TableClient is null || string.IsNullOrEmpty(HostId) || !IsEnabled())
{
return;
}
Expand All @@ -320,6 +340,18 @@ public void WriteDiagnosticEvent(DateTime timestamp, string errorCode, LogLevel
_events[errorCode].HitCount++;
}
}

EnsureFlushLogsTimerInitialized();
}

internal void EnsureFlushLogsTimerInitialized()
{
if (_disposed || !IsEnabled())
{
return;
}

_ = _flushLogsTimer.Value;
}

public bool IsEnabled()
Expand All @@ -341,8 +373,13 @@ private bool IsPrimaryHost()

private void StopTimer()
{
if (!_flushLogsTimer.IsValueCreated)
{
return;
}

Logger.StoppingFlushLogsTimer(_logger);
_flushLogsTimer?.Change(Timeout.Infinite, Timeout.Infinite);
_flushLogsTimer?.Value?.Change(Timeout.Infinite, Timeout.Infinite);
}

protected virtual void Dispose(bool disposing)
Expand All @@ -351,9 +388,9 @@ protected virtual void Dispose(bool disposing)
{
if (disposing)
{
if (_flushLogsTimer != null)
if (_flushLogsTimer?.Value != null)
{
_flushLogsTimer.Dispose();
_flushLogsTimer?.Value?.Dispose();
}

FlushLogs().GetAwaiter().GetResult();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ public async Task TimerFlush_CalledOnExpectedInterval()

DiagnosticEventTableStorageRepository repository = mockDiagnosticEventTableStorageRepository.Object;

// Write a diagnostic event to trigger the timer
mockDiagnosticEventTableStorageRepository.Object.WriteDiagnosticEvent(DateTime.UtcNow, "eh1", LogLevel.Information, "This is the message", "https://fwlink/", new Exception("exception message"));

int numFlushes = 0;
mockDiagnosticEventTableStorageRepository.Protected().Setup("OnFlushLogs", ItExpr.IsAny<object>())
.Callback<object>((state) =>
Expand All @@ -81,6 +84,27 @@ public async Task TimerFlush_CalledOnExpectedInterval()
mockDiagnosticEventTableStorageRepository.VerifyAll();
}

[Fact]
public void TimerFlush_NotStartedWithRepositoryDisabled()
{
IEnvironment testEnvironment = new TestEnvironment();
testEnvironment.SetEnvironmentVariable(EnvironmentSettingNames.AzureWebsitePlaceholderMode, "0");

var configuration = new ConfigurationBuilder().Build();
var localStorageProvider = TestHelpers.GetAzureTableStorageProvider(configuration);
DiagnosticEventTableStorageRepository repository =
new DiagnosticEventTableStorageRepository(_hostIdProvider, testEnvironment, _scriptHostMock.Object, localStorageProvider, _logger);

repository.WriteDiagnosticEvent(DateTime.UtcNow, "eh1", LogLevel.Information, "This is the message", "https://fwlink/", new Exception("exception message"));

var flushLogsTimerField = typeof(DiagnosticEventTableStorageRepository)
.GetField("_flushLogsTimer", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
var lazyTimer = flushLogsTimerField.GetValue(repository) as Lazy<Timer>;

Assert.False(repository.IsEnabled());
Assert.False(lazyTimer.IsValueCreated, "Timer should not be created when repository is disabled");
}

[Fact]
public void WriteDiagnostic_LogsError_WhenHostIdNotSet()
{
Expand Down
Loading