Skip to content

Commit

Permalink
[Preview] AI Integration: Adds latency based filters to add request d…
Browse files Browse the repository at this point in the history
…iagnostics in Activity and some minor changes (#3362)

* add open telemetry config

* add documentation

* add documentation

* add test

* fixed imports

* rebased contract changes

* rename config to options

* changes same as java

* add new contract

* add exception and tests

* remove enable telemetry API and add 2 different with threshold for CRUD and query

* update contract

* check for scope enabled or not before recroding any attribute

* add null check

* null check for non supported operations

* other minor changes

* fix tests

* dispose listener

* remve console log

* enable this feature by default for GA package open telemetry don't even exist

* open telemetry config

* test fix

* test fix

* add request options

* ädd nullcheck

* updated contact

* rIsDistributedTracingEnabledename variable and add more verbose

* rename apis

* update contract

* update request options and contract

* new generated xml

* added Distributed Tracing option with Latency Threshold and code refactor to load client properties during initialization of recorder

* fix test

* update contract

* cosmetic changes

* preview contract changes

* wip

* update coontract

* minor cosmetic changes

* remove publically exposed APIs

* fix tests

Co-authored-by: Sourabh Jain <sourabhjain@microsoft.com>
  • Loading branch information
sourabh1007 and sourabh1007 authored Aug 26, 2022
1 parent 3eeb80a commit 875109d
Show file tree
Hide file tree
Showing 39 changed files with 1,205 additions and 449 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,6 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Disable core SDK logging")]
public bool DisableCoreSdkLogging { get; set; }

[Option(Required = false, HelpText = "Enable Open Telemetry")]
public bool EnableOpenTelemetry { get; set; }

[Option(Required = false, HelpText = "Enable Client Telemetry")]
public bool EnableTelemetry { get; set; }

Expand Down Expand Up @@ -196,11 +193,6 @@ internal Microsoft.Azure.Cosmos.CosmosClient CreateCosmosClient(string accountKe
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint
};

if (this.EnableOpenTelemetry)
{
clientOptions.EnableOpenTelemetry = true;
}

if (this.EnableTelemetry)
{
Environment.SetEnvironmentVariable(
Expand Down
17 changes: 10 additions & 7 deletions Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace Microsoft.Azure.Cosmos
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Client;
using Newtonsoft.Json;
using Telemetry;

/// <summary>
/// Defines all the configurable options that the CosmosClient requires.
Expand Down Expand Up @@ -925,13 +926,15 @@ public override bool CanConvert(Type objectType)
}

/// <summary>
/// Enable OpenTelemetry and start emiting activities for each operations
/// Distributed Tracing Options. <see cref="Microsoft.Azure.Cosmos.DistributedTracingOptions"/>
/// </summary>
#if PREVIEW
public
#else
internal
#endif
bool EnableOpenTelemetry { get; set; }
internal DistributedTracingOptions DistributedTracingOptions { get; set; }

/// <summary>
/// Gets or sets value indicating whether distributed tracing activities (<see cref="System.Diagnostics.Activity"/>) are going to be created for the SDK methods calls and HTTP calls.
/// By default true for Preview package
/// </summary>
internal bool EnableDistributedTracing { get; set; }

}
}
20 changes: 9 additions & 11 deletions Microsoft.Azure.Cosmos/src/Fluent/CosmosClientBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ namespace Microsoft.Azure.Cosmos.Fluent
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using Azure;
using global::Azure;
using global::Azure.Core;
using Microsoft.Azure.Cosmos.Core.Trace;
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Client;
using Telemetry;

/// <summary>
/// This is a Builder class that creates a cosmos client
Expand Down Expand Up @@ -409,7 +409,7 @@ public CosmosClientBuilder WithConnectionModeDirect(TimeSpan? idleTcpConnectionT

return this;
}

/// <summary>
/// This can be used to weaken the database account consistency level for read operations.
/// If this is not set the database account consistency level will be used for all requests.
Expand All @@ -420,20 +420,18 @@ public CosmosClientBuilder WithConsistencyLevel(Cosmos.ConsistencyLevel consiste
{
this.clientOptions.ConsistencyLevel = consistencyLevel;
return this;

}

/// <summary>
/// Enable OpenTelemetry and start emiting activities for each operations
/// If Open Telemetry listener is subscribed for Azure.Cosmos namespace, There are <see cref="Microsoft.Azure.Cosmos.DistributedTracingOptions"/> you can leverage to control it.<br></br>
/// </summary>
/// <param name="options">Tracing Options <see cref="Microsoft.Azure.Cosmos.DistributedTracingOptions"/></param>
/// <returns>The current <see cref="CosmosClientBuilder"/>.</returns>
#if PREVIEW
public
#else
internal
#endif
CosmosClientBuilder EnableOpenTelemetry()
{
this.clientOptions.EnableOpenTelemetry = true;
internal CosmosClientBuilder WithDistributingTracing(DistributedTracingOptions options)
{
this.clientOptions.DistributedTracingOptions = options;

return this;
}

Expand Down
6 changes: 6 additions & 0 deletions Microsoft.Azure.Cosmos/src/RequestOptions/RequestOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ namespace Microsoft.Azure.Cosmos
using System;
using System.Collections.Generic;
using Microsoft.Azure.Documents;
using Telemetry;

/// <summary>
/// The default cosmos request options
Expand Down Expand Up @@ -41,6 +42,11 @@ public class RequestOptions
/// </summary>
public Action<Headers> AddRequestHeaders { get; set; }

/// <summary>
/// Set Request Level Distributed Tracing Options.
/// </summary>
internal DistributedTracingOptions DistributedTracingOptions { get; set; }

/// <summary>
/// Gets or sets the boolean to use effective partition key routing in the cosmos db request.
/// </summary>
Expand Down
20 changes: 11 additions & 9 deletions Microsoft.Azure.Cosmos/src/Resource/ClientContextCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ namespace Microsoft.Azure.Cosmos
using Microsoft.Azure.Cosmos.Resource.CosmosExceptions;
using Microsoft.Azure.Cosmos.Routing;
using Microsoft.Azure.Cosmos.Telemetry;
using Microsoft.Azure.Cosmos.Telemetry.Diagnostics;
using Microsoft.Azure.Cosmos.Telemetry.OpenTelemetry;
using Microsoft.Azure.Cosmos.Tracing;
using Microsoft.Azure.Documents;

Expand Down Expand Up @@ -266,7 +264,8 @@ private async Task<TResult> OperationHelperWithRootTraceAsync<TResult>(
trace,
task,
openTelemetry,
operationName);
operationName,
requestOptions);
}
}

Expand Down Expand Up @@ -295,7 +294,8 @@ private Task<TResult> OperationHelperWithRootTraceWithSynchronizationContextAsyn
trace,
task,
openTelemetry,
operationName);
operationName,
requestOptions);
}
});
}
Expand Down Expand Up @@ -471,22 +471,24 @@ private async Task<TResult> RunWithDiagnosticsHelperAsync<TResult>(
ITrace trace,
Func<ITrace, Task<TResult>> task,
Func<TResult, OpenTelemetryAttributes> openTelemetry,
string operationName)
string operationName,
RequestOptions requestOptions)
{
using (OpenTelemetryCoreRecorder recorder =
OpenTelemetryRecorderFactory.CreateRecorder(
operationName: operationName,
isFeatureEnabled: this.clientOptions.EnableOpenTelemetry))
requestOptions: requestOptions,
clientContext: this.isDisposed ? null : this))
using (new ActivityScope(Guid.NewGuid()))
{
try
{
// Record Operation Name
recorder.Record(OpenTelemetryAttributeKeys.DbOperation, operationName);

TResult result = await task(trace).ConfigureAwait(false);
if (openTelemetry != null && recorder.IsEnabled)
{
// Record client and other information
recorder.Record(operationName, this);

// Record request response information
OpenTelemetryAttributes response = openTelemetry(result);
recorder.Record(response);
Expand Down
2 changes: 1 addition & 1 deletion Microsoft.Azure.Cosmos/src/Resource/CosmosClientContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ namespace Microsoft.Azure.Cosmos
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Azure.Cosmos.Handlers;
using Microsoft.Azure.Cosmos.Telemetry.OpenTelemetry;
using Microsoft.Azure.Cosmos.Tracing;
using Microsoft.Azure.Documents;
using Telemetry;

/// <summary>
/// This class is used to get access to different client level operations without directly referencing the client object.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public override Task<ContainerResponse> CreateContainerAsync(
nameof(CreateContainerAsync),
requestOptions,
(trace) => base.CreateContainerAsync(containerProperties, throughput, requestOptions, trace, cancellationToken),
(response) => new OpenTelemetryResponse<ContainerProperties>(response));
(response) => new OpenTelemetryResponse<ContainerProperties>(response, this.Id));
}

public override Task<ContainerResponse> CreateContainerAsync(string id,
Expand All @@ -43,7 +43,7 @@ public override Task<ContainerResponse> CreateContainerAsync(string id,
nameof(CreateContainerAsync),
requestOptions,
(trace) => base.CreateContainerAsync(id, partitionKeyPath, throughput, requestOptions, trace, cancellationToken),
(response) => new OpenTelemetryResponse<ContainerProperties>(response));
(response) => new OpenTelemetryResponse<ContainerProperties>(response, this.Id));
}

public override Task<ContainerResponse> CreateContainerIfNotExistsAsync(
Expand All @@ -56,7 +56,7 @@ public override Task<ContainerResponse> CreateContainerIfNotExistsAsync(
nameof(CreateContainerIfNotExistsAsync),
requestOptions,
(trace) => base.CreateContainerIfNotExistsAsync(containerProperties, throughput, requestOptions, trace, cancellationToken),
(response) => new OpenTelemetryResponse<ContainerProperties>(response));
(response) => new OpenTelemetryResponse<ContainerProperties>(response, this.Id));
}

public override Task<ContainerResponse> CreateContainerIfNotExistsAsync(
Expand All @@ -70,7 +70,7 @@ public override Task<ContainerResponse> CreateContainerIfNotExistsAsync(
nameof(CreateContainerIfNotExistsAsync),
requestOptions,
(trace) => base.CreateContainerIfNotExistsAsync(id, partitionKeyPath, throughput, requestOptions, trace, cancellationToken),
(response) => new OpenTelemetryResponse<ContainerProperties>(response));
(response) => new OpenTelemetryResponse<ContainerProperties>(response, this.Id));
}

public override Task<ResponseMessage> CreateContainerStreamAsync(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// ------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// ------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System;
using System.Runtime.CompilerServices;

/// <summary>
/// Open Telemetry Configuration
/// It needs to be public once AppInsight is ready
/// </summary>
internal sealed class DistributedTracingOptions
{
/// <summary>
/// Default Latency threshold for other than query Operation
/// </summary>
internal static readonly TimeSpan DefaultCrudLatencyThreshold = TimeSpan.FromMilliseconds(100);

/// <summary>
/// Default Latency threshold for QUERY operation
/// </summary>
internal static readonly TimeSpan DefaultQueryTimeoutThreshold = TimeSpan.FromMilliseconds(500);

/// <summary>
/// Latency Threshold to generate (<see cref="System.Diagnostics.Tracing.EventSource"/>) with Request diagnostics in distributing Tracing.<br></br>
/// If it is not set then by default it will generate (<see cref="System.Diagnostics.Tracing.EventSource"/>) for query operation which are taking more than 500 ms and non-query operations taking more than 100 ms.
/// </summary>
public TimeSpan? DiagnosticsLatencyThreshold { get; set; }

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,32 @@
namespace Microsoft.Azure.Cosmos.Telemetry.Diagnostics
{
using System;
using System.Net;
using Documents;

internal static class DiagnosticsFilterHelper
{
private static readonly TimeSpan latencyThresholdInMs = TimeSpan.FromMilliseconds(250);

/// <summary>
/// Allow only when either of below is <b>True</b><br></br>
/// 1) Latency is not more than 100 ms<br></br>
/// 1) Latency is not more than 100/250 (query) ms<br></br>
/// 3) HTTP status code is not Success<br></br>
/// </summary>
/// <returns>true or false</returns>
public static bool IsAllowed(
TimeSpan latency,
HttpStatusCode statuscode)
public static bool IsTracingNeeded(
DistributedTracingOptions config,
OpenTelemetryAttributes response)
{
return latency > DiagnosticsFilterHelper.latencyThresholdInMs || !statuscode.IsSuccess();
TimeSpan latencyThreshold;

if (config?.DiagnosticsLatencyThreshold != null)
{
latencyThreshold = config.DiagnosticsLatencyThreshold.Value;
}
else
{
latencyThreshold = response.OperationType == OperationType.Query ? DistributedTracingOptions.DefaultQueryTimeoutThreshold : DistributedTracingOptions.DefaultCrudLatencyThreshold;
}

return response.Diagnostics.GetClientElapsedTime() > latencyThreshold || !response.StatusCode.IsSuccess();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Telemetry.Diagnostics
namespace Microsoft.Azure.Cosmos.Telemetry
{
internal sealed class OpenTelemetryAttributeKeys
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,29 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// ------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Telemetry.OpenTelemetry
namespace Microsoft.Azure.Cosmos.Telemetry
{
using System.Net;
using Microsoft.Azure.Documents;

internal class OpenTelemetryAttributes
{
internal const string NotAvailable = "NA";

internal const string NotAvailable = "information not available";

/// <summary>
/// For testing purpose only, to make initialization of this class easy
/// </summary>
internal OpenTelemetryAttributes()
{
}

internal OpenTelemetryAttributes(RequestMessage requestMessage)
{
this.RequestContentLength = requestMessage?.Headers?.ContentLength ?? OpenTelemetryAttributes.NotAvailable;
this.ContainerName = requestMessage?.ContainerId ?? OpenTelemetryAttributes.NotAvailable;
this.DatabaseName = requestMessage?.DatabaseId ?? OpenTelemetryAttributes.NotAvailable;

this.OperationType = requestMessage?.OperationType ?? OperationType.Invalid;
}

/// <summary>
Expand Down Expand Up @@ -56,5 +66,10 @@ internal OpenTelemetryAttributes(RequestMessage requestMessage)
/// ItemCount
/// </summary>
internal CosmosDiagnostics Diagnostics { get; set; }

/// <summary>
/// OperationType
/// </summary>
internal OperationType OperationType { get; set; }
}
}
Loading

0 comments on commit 875109d

Please sign in to comment.