Skip to content
This repository was archived by the owner on Nov 1, 2023. It is now read-only.

Commit d84b72b

Browse files
nharper285chkeita
andauthored
Initial Custom Metrics - Node & Task Heartbeat (#3082)
* Refactor logging to use and new function queue * Testing setup of custom metric. * Changing host.json * Updating log interface. * changes. * Fix encoding. * Updating. * Updating tests. * Adding metrics to program * Pushing latest changes. * Update interface references. * Removing string type. * Add string back. * Getting additional data for task heartbeat. * Removing additional fields. * Removing containers. * Cleaning up. * Adding feature flag. * Adding bicep changes. * Fixing tests. * Fixing test metrics. * Removing most of tests. * Telemetry Refact Round 2. * Updated metrics. * Remove custom metric function. * Syncing events.cs * Making optional. * Using events as metric dimensions. * Fixing ORM tests. * Remove metric records. * Removing bad test. * Remove testmetrics.' * Adding test back. * Improving custom dimensions serialization. * Update src/ApiService/ApiService/onefuzzlib/Metrics.cs Co-authored-by: Cheick Keita <kcheick@gmail.com> * Reverting change. --------- Co-authored-by: Cheick Keita <kcheick@gmail.com>
1 parent 64782d7 commit d84b72b

File tree

16 files changed

+156
-18
lines changed

16 files changed

+156
-18
lines changed

src/ApiService/ApiService/FeatureFlags.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ public static class FeatureFlagConstants {
44
public const string RenderOnlyScribanTemplates = "RenderOnlyScribanTemplates";
55
public const string EnableNodeDecommissionStrategy = "EnableNodeDecommissionStrategy";
66
public const string SemanticNotificationConfigValidation = "SemanticNotificationConfigValidation";
7+
public const string EnableCustomMetricTelemetry = "EnableCustomMetricTelemetry";
78
}

src/ApiService/ApiService/Functions/QueueNodeHeartbeat.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ public QueueNodeHearbeat(ILogTracer log, IOnefuzzContext context) {
1717

1818
[Function("QueueNodeHeartbeat")]
1919
public async Async.Task Run([QueueTrigger("node-heartbeat", Connection = "AzureWebJobsStorage")] string msg) {
20-
_log.Info($"heartbeat: {msg}");
20+
2121
var nodes = _context.NodeOperations;
2222
var events = _context.Events;
23+
var metrics = _context.Metrics;
2324

25+
_log.Info($"heartbeat: {msg}");
2426
var hb = JsonSerializer.Deserialize<NodeHeartbeatEntry>(msg, EntityConverter.GetJsonSerializerOptions()).EnsureNotNull($"wrong data {msg}");
2527
var node = await nodes.GetByMachineId(hb.NodeId);
2628

@@ -35,7 +37,12 @@ public async Async.Task Run([QueueTrigger("node-heartbeat", Connection = "AzureW
3537
_log.WithHttpStatus(r.ErrorV).Error($"Failed to replace heartbeat: {hb.NodeId:Tag:NodeId}");
3638
}
3739

40+
var nodeHeartbeatEvent = new EventNodeHeartbeat(node.MachineId, node.ScalesetId, node.PoolName, node.State);
3841
// TODO: do we still send event if we fail do update the table ?
39-
await events.SendEvent(new EventNodeHeartbeat(node.MachineId, node.ScalesetId, node.PoolName));
42+
await events.SendEvent(nodeHeartbeatEvent);
43+
if (await _context.FeatureManagerSnapshot.IsEnabledAsync(FeatureFlagConstants.EnableCustomMetricTelemetry)) {
44+
metrics.SendMetric(1, nodeHeartbeatEvent);
45+
}
46+
4047
}
4148
}

src/ApiService/ApiService/Functions/QueueTaskHeartbeat.cs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,34 +7,45 @@ namespace Microsoft.OneFuzz.Service.Functions;
77

88
public class QueueTaskHearbeat {
99
private readonly ILogTracer _log;
10+
private readonly IOnefuzzContext _context;
1011

11-
private readonly IEvents _events;
12-
private readonly ITaskOperations _tasks;
13-
14-
public QueueTaskHearbeat(ILogTracer logTracer, ITaskOperations tasks, IEvents events) {
12+
public QueueTaskHearbeat(ILogTracer logTracer, IOnefuzzContext context) {
1513
_log = logTracer;
16-
_tasks = tasks;
17-
_events = events;
14+
_context = context;
1815
}
1916

2017
[Function("QueueTaskHeartbeat")]
2118
public async Async.Task Run([QueueTrigger("task-heartbeat", Connection = "AzureWebJobsStorage")] string msg) {
22-
_log.Info($"heartbeat: {msg}");
2319

20+
var _tasks = _context.TaskOperations;
21+
var _jobs = _context.JobOperations;
22+
var _events = _context.Events;
23+
var _metrics = _context.Metrics;
24+
25+
_log.Info($"heartbeat: {msg}");
2426
var hb = JsonSerializer.Deserialize<TaskHeartbeatEntry>(msg, EntityConverter.GetJsonSerializerOptions()).EnsureNotNull($"wrong data {msg}");
2527

2628
var task = await _tasks.GetByTaskId(hb.TaskId);
27-
2829
if (task == null) {
2930
_log.Warning($"invalid {hb.TaskId:Tag:TaskId}");
3031
return;
3132
}
3233

34+
var job = await _jobs.Get(task.JobId);
35+
if (job == null) {
36+
_log.Warning($"invalid {task.JobId:Tag:JobId}");
37+
return;
38+
}
3339
var newTask = task with { Heartbeat = DateTimeOffset.UtcNow };
3440
var r = await _tasks.Replace(newTask);
3541
if (!r.IsOk) {
3642
_log.WithHttpStatus(r.ErrorV).Error($"failed to replace with new task {hb.TaskId:Tag:TaskId}");
3743
}
38-
await _events.SendEvent(new EventTaskHeartbeat(newTask.JobId, newTask.TaskId, newTask.Config));
44+
45+
var taskHeartBeatEvent = new EventTaskHeartbeat(newTask.JobId, newTask.TaskId, job.Config.Project, job.Config.Name, newTask.State, newTask.Config);
46+
await _events.SendEvent(taskHeartBeatEvent);
47+
if (await _context.FeatureManagerSnapshot.IsEnabledAsync(FeatureFlagConstants.EnableCustomMetricTelemetry)) {
48+
_metrics.SendMetric(1, taskHeartBeatEvent);
49+
}
3950
}
4051
}

src/ApiService/ApiService/Log.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ public void AppendFormatted<T>(T message, string? format) {
5757
public interface ILog {
5858
void Log(Guid correlationId, LogStringHandler message, SeverityLevel level, IReadOnlyDictionary<string, string> tags, string? caller);
5959
void LogEvent(Guid correlationId, LogStringHandler evt, IReadOnlyDictionary<string, string> tags, IReadOnlyDictionary<string, double>? metrics, string? caller);
60+
void LogMetric(Guid correlationId, LogStringHandler metric, int value, IReadOnlyDictionary<string, string>? customDimensions, IReadOnlyDictionary<string, string> tags, string? caller);
61+
6062
void LogException(Guid correlationId, Exception ex, LogStringHandler message, IReadOnlyDictionary<string, string> tags, IReadOnlyDictionary<string, double>? metrics, string? caller);
6163
void Flush();
6264
}
@@ -103,6 +105,17 @@ public void LogEvent(Guid correlationId, LogStringHandler evt, IReadOnlyDictiona
103105
_telemetryClient.TrackEvent(telemetry);
104106
}
105107

108+
public void LogMetric(Guid correlationId, LogStringHandler metric, int value, IReadOnlyDictionary<string, string>? customDimensions, IReadOnlyDictionary<string, string> tags, string? caller) {
109+
var telemetry = new MetricTelemetry(metric.ToString(), value, value, value, value, value);
110+
// copy properties
111+
Copy(telemetry.Properties, customDimensions);
112+
telemetry.Properties["CorrelationId"] = correlationId.ToString();
113+
if (caller is not null) telemetry.Properties["CalledBy"] = caller;
114+
Copy(telemetry.Properties, metric.Tags);
115+
116+
_telemetryClient.TrackMetric(telemetry);
117+
}
118+
106119
public void LogException(Guid correlationId, Exception ex, LogStringHandler message, IReadOnlyDictionary<string, string> tags, IReadOnlyDictionary<string, double>? metrics, string? caller) {
107120
{
108121
var telemetry = new ExceptionTelemetry(ex);
@@ -160,11 +173,17 @@ public void Log(Guid correlationId, LogStringHandler message, SeverityLevel leve
160173
}
161174
}
162175

176+
public void LogMetric(Guid correlationId, LogStringHandler metric, int value, IReadOnlyDictionary<string, string>? customDimensions, IReadOnlyDictionary<string, string> tags, string? caller) {
177+
System.Console.Out.WriteLine($"[{correlationId}][Metric] {metric}");
178+
LogTags(correlationId, tags);
179+
}
180+
163181
public void LogEvent(Guid correlationId, LogStringHandler evt, IReadOnlyDictionary<string, string> tags, IReadOnlyDictionary<string, double>? metrics, string? caller) {
164182
System.Console.Out.WriteLine($"[{correlationId}][Event] {evt}");
165183
LogTags(correlationId, tags);
166184
LogMetrics(correlationId, metrics);
167185
}
186+
168187
public void LogException(Guid correlationId, Exception ex, LogStringHandler message, IReadOnlyDictionary<string, string> tags, IReadOnlyDictionary<string, double>? metrics, string? caller) {
169188
System.Console.Out.WriteLine($"[{correlationId}][Exception] {message}:{ex}");
170189
LogTags(correlationId, tags);
@@ -183,6 +202,7 @@ public interface ILogTracer {
183202

184203
void Error(Error error);
185204
void Event(LogStringHandler evt, IReadOnlyDictionary<string, double>? metrics = null);
205+
void Metric(LogStringHandler metric, int value, IReadOnlyDictionary<string, string>? customDimensions);
186206
void Exception(Exception ex, LogStringHandler message = $"", IReadOnlyDictionary<string, double>? metrics = null);
187207
void ForceFlush();
188208
void Info(LogStringHandler message);
@@ -327,6 +347,13 @@ public void Event(LogStringHandler evt, IReadOnlyDictionary<string, double>? met
327347
}
328348
}
329349

350+
public void Metric(LogStringHandler metric, int value, IReadOnlyDictionary<string, string>? customDimensions) {
351+
var caller = GetCaller();
352+
foreach (var logger in _loggers) {
353+
logger.LogMetric(CorrelationId, metric, value, customDimensions, Tags, caller);
354+
}
355+
}
356+
330357
public void Exception(Exception ex, LogStringHandler message, IReadOnlyDictionary<string, double>? metrics) {
331358
var caller = GetCaller();
332359
foreach (var logger in _loggers) {

src/ApiService/ApiService/OneFuzzTypes/Events.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,14 @@ public record EventTaskStateUpdated(
167167
TaskConfig Config
168168
) : BaseEvent();
169169

170-
171170
[EventType(EventType.TaskHeartbeat)]
172171
public record EventTaskHeartbeat(
173172
Guid JobId,
174173
Guid TaskId,
175-
TaskConfig Config
174+
string? Project,
175+
string? Name,
176+
TaskState? State,
177+
TaskConfig? Config
176178
) : BaseEvent();
177179

178180
[EventType(EventType.Ping)]
@@ -273,7 +275,8 @@ PoolName PoolName
273275
public record EventNodeHeartbeat(
274276
Guid MachineId,
275277
Guid? ScalesetId,
276-
PoolName PoolName
278+
PoolName PoolName,
279+
NodeState state
277280
) : BaseEvent();
278281

279282

src/ApiService/ApiService/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ public static async Async.Task Main() {
8585
})
8686
.AddScoped<IAutoScaleOperations, AutoScaleOperations>()
8787
.AddScoped<INodeOperations, NodeOperations>()
88+
.AddScoped<IMetrics, Metrics>()
8889
.AddScoped<IEvents, Events>()
8990
.AddScoped<IWebhookOperations, WebhookOperations>()
9091
.AddScoped<IWebhookMessageLogOperations, WebhookMessageLogOperations>()

src/ApiService/ApiService/host.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
"logging": {
44
"applicationInsights": {
55
"samplingSettings": {
6-
"isEnabled": true,
7-
"excludedTypes": "Request"
6+
"isEnabled": false,
7+
"excludedTypes": "Request;Trace;Dependency;Event;Exception"
88
}
99
}
1010
}

src/ApiService/ApiService/onefuzzlib/Events.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ public SignalREvent Truncate(int maxLength) {
2121

2222
public interface IEvents {
2323
Async.Task SendEvent(BaseEvent anEvent);
24-
2524
Async.Task QueueSignalrEvent(DownloadableEventMessage message);
2625

2726
void LogEvent(BaseEvent anEvent);
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
using System.Text.Json;
2+
using System.Text.Json.Serialization;
3+
using Microsoft.OneFuzz.Service.OneFuzzLib.Orm;
4+
5+
namespace Microsoft.OneFuzz.Service {
6+
7+
public record CustomMetric(
8+
string name,
9+
int value,
10+
Dictionary<string, string> customDimensions
11+
);
12+
13+
14+
public interface IMetrics {
15+
void SendMetric(int metricValue, BaseEvent customDimensions);
16+
17+
void LogMetric(BaseEvent metric);
18+
}
19+
20+
public class Metrics : IMetrics {
21+
private readonly ILogTracer _log;
22+
private readonly IOnefuzzContext _context;
23+
private readonly JsonSerializerOptions _options;
24+
25+
public Metrics(ILogTracer log, IOnefuzzContext context) {
26+
_context = context;
27+
_log = log;
28+
_options = new JsonSerializerOptions(EntityConverter.GetJsonSerializerOptions()) {
29+
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
30+
};
31+
_options.Converters.Add(new RemoveUserInfo());
32+
}
33+
34+
public void SendMetric(int metricValue, BaseEvent customDimensions) {
35+
var metricType = customDimensions.GetEventType();
36+
37+
_ = _options.PropertyNamingPolicy ?? throw new ArgumentException("Serializer _options not available.");
38+
39+
var metricTypeSnakeCase = _options.PropertyNamingPolicy.ConvertName($"{metricType}");
40+
41+
var dimensionNode = JsonSerializer.SerializeToNode(customDimensions, customDimensions.GetType(), _options);
42+
_ = dimensionNode ?? throw new JsonException("Was not able to properly serialize the custom dimensions.");
43+
var dimensionDict = dimensionNode.AsObject().ToDictionary(kvp => kvp.Key.ToString(), kvp => kvp.Value is not null ? kvp.Value.ToString() : "");
44+
45+
_log.Metric($"{metricTypeSnakeCase}", metricValue, dimensionDict);
46+
LogMetric(customDimensions);
47+
}
48+
49+
public void LogMetric(BaseEvent metric) {
50+
var serializedMetric = JsonSerializer.Serialize(metric, metric.GetType(), _options);
51+
_log.Info($"sending metric: {metric.GetEventType():Tag:MetricType} - {serializedMetric}");
52+
}
53+
}
54+
}

src/ApiService/ApiService/onefuzzlib/OnefuzzContext.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ public interface IOnefuzzContext {
1515
ICreds Creds { get; }
1616
IDiskOperations DiskOperations { get; }
1717
IEvents Events { get; }
18+
IMetrics Metrics { get; }
1819
IExtensions Extensions { get; }
1920
IIpOperations IpOperations { get; }
2021
IJobOperations JobOperations { get; }
@@ -62,6 +63,7 @@ public OnefuzzContext(IServiceProvider serviceProvider) {
6263
public IAutoScaleOperations AutoScaleOperations => _serviceProvider.GetRequiredService<IAutoScaleOperations>();
6364
public INodeOperations NodeOperations => _serviceProvider.GetRequiredService<INodeOperations>();
6465
public IEvents Events => _serviceProvider.GetRequiredService<IEvents>();
66+
public IMetrics Metrics => _serviceProvider.GetRequiredService<IMetrics>();
6567
public IWebhookOperations WebhookOperations => _serviceProvider.GetRequiredService<IWebhookOperations>();
6668
public IWebhookMessageLogOperations WebhookMessageLogOperations => _serviceProvider.GetRequiredService<IWebhookMessageLogOperations>();
6769
public ITaskOperations TaskOperations => _serviceProvider.GetRequiredService<ITaskOperations>();

0 commit comments

Comments
 (0)