Skip to content

Updating startup failure logging. #11055

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions release_notes.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
### Release notes

<!-- Please add your release notes in the following format:
- My change description (#PR)
-->
- Memory allocation optimizations in `ScriptStartupTypeLocator.GetExtensionsStartupTypesAsync` (#11012)
- Fix invocation timeout when incoming request contains "x-ms-invocation-id" header (#10980)
- Warn if .azurefunctions folder does not exist (#10967)
- Memory allocation & CPU optimizations in `GrpcMessageExtensionUtilities.ConvertFromHttpMessageToExpando` (#11054)
- Memory allocation optimizations in `ReadLanguageWorkerFile` by reading files in buffered chunks, preventing LOH allocations (#11069)
### Release notes

<!-- Please add your release notes in the following format:
- My change description (#PR)
-->
- Memory allocation optimizations in `ScriptStartupTypeLocator.GetExtensionsStartupTypesAsync` (#11012)
- Fix invocation timeout when incoming request contains "x-ms-invocation-id" header (#10980)
- Warn if .azurefunctions folder does not exist (#10967)
- Memory allocation & CPU optimizations in `GrpcMessageExtensionUtilities.ConvertFromHttpMessageToExpando` (#11054)
- Memory allocation optimizations in `ReadLanguageWorkerFile` by reading files in buffered chunks, preventing LOH allocations (#11069)
- Enhancing the capability to send startup failure logs to AppInsights/Otel. (#11055)
65 changes: 65 additions & 0 deletions src/WebJobs.Script.WebHost/Diagnostics/CompositeLogger.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.

using System;
using System.Linq;
using Microsoft.Extensions.Logging;

namespace Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics
{
internal sealed class CompositeLogger : ILogger
{
private readonly ILogger[] _loggers;

public CompositeLogger(params ILogger[] loggers)
{
_loggers = loggers is { Length: > 0 } ? loggers : throw new ArgumentNullException(nameof(loggers));
}

public IDisposable BeginScope<TState>(TState state)
{
// Create a composite scope that wraps scopes from all loggers
var scopes = new IDisposable[_loggers.Length];
for (int i = 0; i < _loggers.Length; i++)
{
scopes[i] = _loggers[i].BeginScope(state);
}

return new CompositeScope(scopes);
}

public bool IsEnabled(LogLevel logLevel)
{
return _loggers.Any(l => l.IsEnabled(logLevel));
}

public void Log<TState>(
LogLevel logLevel, EventId eventId, TState state,
Exception exception, Func<TState, Exception, string> formatter)
{
foreach (var logger in _loggers)
{
if (logger.IsEnabled(logLevel))
{
logger.Log(logLevel, eventId, state, exception, formatter);
}
}
}

private sealed class CompositeScope : IDisposable
{
private readonly IDisposable[] _scopes;

public CompositeScope(IDisposable[] scopes)
=> _scopes = scopes;

public void Dispose()
{
foreach (ref readonly var scope in _scopes.AsSpan())
{
scope?.Dispose();
}
}
}
}
}
109 changes: 105 additions & 4 deletions src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
using System.Collections.ObjectModel;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Threading;
using System.Threading.Tasks;
using Azure.Identity;
using Azure.Monitor.OpenTelemetry.Exporter;
using Microsoft.ApplicationInsights.AspNetCore;
using Microsoft.ApplicationInsights.AspNetCore.Extensions;
using Microsoft.ApplicationInsights.DependencyCollector;
Expand Down Expand Up @@ -36,6 +39,7 @@
using OpenTelemetry.Metrics;
using OpenTelemetry.Trace;
using static Microsoft.Azure.WebJobs.Script.EnvironmentSettingNames;
using AppInsightsCredentialOptions = Microsoft.Azure.WebJobs.Logging.ApplicationInsights.TokenCredentialOptions;
using IApplicationLifetime = Microsoft.AspNetCore.Hosting.IApplicationLifetime;

namespace Microsoft.Azure.WebJobs.Script.WebHost
Expand Down Expand Up @@ -224,7 +228,7 @@ public async Task StartAsync(CancellationToken cancellationToken)
}
}

private void CheckFileSystem()
private async Task CheckFileSystemAsync()
{
if (_environment.ZipDeploymentAppSettingsExist())
{
Expand All @@ -247,7 +251,10 @@ private void CheckFileSystem()
}
finally
{
_logger.LogError(errorPrefix + errorSuffix);
var errorMessage = $"{errorPrefix}{errorSuffix}";

_logger.LogError(errorMessage);
await LogErrorWithTransientOtelLoggerAsync(errorMessage);
}
_applicationLifetime.StopApplication();
}
Expand Down Expand Up @@ -319,7 +326,7 @@ private async Task StartHostAsync(CancellationToken cancellationToken, int attem
/// </summary>
private async Task UnsynchronizedStartHostAsync(ScriptHostStartupOperation activeOperation, int attemptCount = 0, JobHostStartupMode startupMode = JobHostStartupMode.Normal)
{
CheckFileSystem();
await CheckFileSystemAsync();
if (ShutdownRequested)
{
return;
Expand Down Expand Up @@ -736,7 +743,20 @@ private ILogger GetHostLogger(IHost host)

// Attempt to get the host logger with JobHost configuration applied
// using the default logger as a fallback
return hostLoggerFactory?.CreateLogger(LogCategories.Startup) ?? _logger;
if (hostLoggerFactory is not null)
{
return hostLoggerFactory?.CreateLogger(LogCategories.Startup);
}

// An error occurred before the host was built; a minimal logger factory is being created to send telemetry to AppInsights/Otel.
var otelLoggerFactory = BuildOtelLoggerFactory();

// If the Otel logger factory is null, use the fallback logger instead. These logs will not be accessible in AppInsights/Otel.
if (otelLoggerFactory is null)
{
return _logger;
}
return new CompositeLogger(_logger, otelLoggerFactory.CreateLogger(LogCategories.Startup));
}

private void LogInitialization(IHost host, bool isOffline, int attemptCount, int startCount, Guid operationId)
Expand Down Expand Up @@ -1047,6 +1067,87 @@ private void EndStartupOperation(ScriptHostStartupOperation operation)
_logger.StartupOperationCompleted(operation.Id);
}

private async Task LogErrorWithTransientOtelLoggerAsync(string log)
{
var loggerFactory = BuildOtelLoggerFactory();

if (loggerFactory is not null)
{
var logger = loggerFactory.CreateLogger(ScriptConstants.LogCategoryHostGeneral);
logger.LogError(log);

// Delay increases the chance that the log is sent to AppInsights/Otel before the logger factory is disposed
await Task.Delay(2000);
// Do a force flush as the host is shutting down and we want to ensure the logs are sent before disposing the logger factory
ForceFlush(loggerFactory);
// Give some time for the logger to flush
await Task.Delay(4000);
}
}

private ILoggerFactory BuildOtelLoggerFactory()
{
var appInsightsConnStr = GetConfigurationValue(AppInsightsConnectionString, _config);
var otlpEndpoint = GetConfigurationValue(OtlpEndpoint, _config);
if (appInsightsConnStr is not { Length: > 0 } && otlpEndpoint is not { Length: > 0 })
{
return null; // Nothing configured
}

// Create a minimal logger factory with OpenTelemetry and Azure Monitor exporter
return LoggerFactory.Create(builder =>
{
builder.AddOpenTelemetry(logging =>
{
logging.IncludeScopes = true;
logging.IncludeFormattedMessage = true;

if (appInsightsConnStr is { Length: > 0 })
{
logging.AddAzureMonitorLogExporter(options =>
{
options.ConnectionString = appInsightsConnStr;

var appInsightsAuthStr = GetConfigurationValue(AppInsightsAuthenticationString, _config);
if (appInsightsAuthStr is { Length: > 0 })
{
var credOptions = AppInsightsCredentialOptions.ParseAuthenticationString(appInsightsAuthStr);
options.Credential = new ManagedIdentityCredential(credOptions.ClientId);
}
});
}

if (otlpEndpoint is { Length: > 0 })
{
logging.AddOtlpExporter();
}
});
});
}

private void ForceFlush(ILoggerFactory loggerFactory)
{
var serviceProvider = (IServiceProvider)loggerFactory.GetType()
.GetField("_serviceProvider", BindingFlags.NonPublic | BindingFlags.Instance)
.GetValue(loggerFactory);

// Get all logger providers from the service provider
var providers = serviceProvider?.GetServices<ILoggerProvider>() ?? Enumerable.Empty<ILoggerProvider>();

foreach (var provider in providers)
{
if (provider is OpenTelemetryLoggerProvider otelProvider)
{
otelProvider.Dispose();
}
}
}

private static string GetConfigurationValue(string key, IConfiguration configuration = null)
{
return configuration?[key] ?? Environment.GetEnvironmentVariable(key);
}

protected virtual void Dispose(bool disposing)
{
if (!_disposed)
Expand Down
43 changes: 3 additions & 40 deletions src/WebJobs.Script/ScriptHostBuilderExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,7 @@
using System.ComponentModel;
using System.Diagnostics.Tracing;
using System.Runtime.InteropServices;
using System.Threading;
using Azure.Identity;
using Microsoft.ApplicationInsights;
using Microsoft.ApplicationInsights.Channel;
using Microsoft.ApplicationInsights.DataContracts;
using Microsoft.ApplicationInsights.Extensibility;
using Microsoft.ApplicationInsights.Extensibility.Implementation;
using Microsoft.ApplicationInsights.WindowsServer.TelemetryChannel;
using Microsoft.Azure.WebJobs.Host;
using Microsoft.Azure.WebJobs.Host.Executors;
Expand Down Expand Up @@ -251,9 +245,7 @@ public static IHostBuilder AddScriptHostCore(this IHostBuilder builder, ScriptAp
}
catch (Exception ex)
{
string appInsightsConnStr = GetConfigurationValue(EnvironmentSettingNames.AppInsightsConnectionString, context.Configuration);
string appInsightsAuthStr = GetConfigurationValue(EnvironmentSettingNames.AppInsightsAuthenticationString, context.Configuration);
RecordAndThrowExternalStartupException("Error configuring services in an external startup class.", ex, loggerFactory, appInsightsConnStr, appInsightsAuthStr);
RecordAndThrowExternalStartupException("Error configuring services in an external startup class.", ex, loggerFactory);
}
}

Expand All @@ -279,10 +271,7 @@ public static IHostBuilder AddScriptHostCore(this IHostBuilder builder, ScriptAp
}
catch (Exception ex)
{
// Go directly to the environment; We have no valid configuration from the customer at this point.
string appInsightsConnStr = GetConfigurationValue(EnvironmentSettingNames.AppInsightsConnectionString);
string appInsightsAuthStr = GetConfigurationValue(EnvironmentSettingNames.AppInsightsAuthenticationString);
RecordAndThrowExternalStartupException("Error building configuration in an external startup class.", ex, loggerFactory, appInsightsConnStr, appInsightsAuthStr);
RecordAndThrowExternalStartupException("Error building configuration in an external startup class.", ex, loggerFactory);
}
});
}
Expand Down Expand Up @@ -578,38 +567,12 @@ private static T GetAndRemove<T>(this IDictionary<object, object> dictionary, st
}
}

private static void RecordAndThrowExternalStartupException(string message, Exception ex, ILoggerFactory loggerFactory, string appInsightsConnStr, string appInsightsAuthString)
private static void RecordAndThrowExternalStartupException(string message, Exception ex, ILoggerFactory loggerFactory)
{
var startupEx = new ExternalStartupException(message, ex);

var logger = loggerFactory.CreateLogger(LogCategories.Startup);
logger.LogDiagnosticEventError(DiagnosticEventConstants.ExternalStartupErrorCode, message, DiagnosticEventConstants.ExternalStartupErrorHelpLink, startupEx);

// Send the error to App Insights if possible. This is happening during ScriptHost construction so we
// have no existing TelemetryClient to use. Create a one-off client and flush it ASAP.
if (!string.IsNullOrEmpty(appInsightsConnStr))
{
using TelemetryConfiguration telemetryConfiguration = new()
{
ConnectionString = appInsightsConnStr,
TelemetryChannel = new InMemoryChannel()
};

if (!string.IsNullOrEmpty(appInsightsAuthString))
{
TokenCredentialOptions credentialOptions = TokenCredentialOptions.ParseAuthenticationString(appInsightsAuthString);
// Default is connection string based ingestion
telemetryConfiguration.SetAzureTokenCredential(new ManagedIdentityCredential(credentialOptions.ClientId));
}

TelemetryClient telemetryClient = new(telemetryConfiguration);
telemetryClient.Context.GetInternalContext().SdkVersion = new ApplicationInsightsSdkVersionProvider().GetSdkVersion();
telemetryClient.TrackTrace(startupEx.ToString(), SeverityLevel.Error);
telemetryClient.TrackException(startupEx);
telemetryClient.Flush();
// Flush is async, sleep for 1 sec to give it time to flush
Thread.Sleep(1000);
}
throw startupEx;
}

Expand Down