Skip to content

Commit

Permalink
Longhaul: Add threshold for device not found (#4642)
Browse files Browse the repository at this point in the history
Longhaul is seeing DeviceNotFound exceptions. We didn't used to see this. Bear and I think that this is because either:
- EdgeHub is taking a long time to restart
- The exception's transient field was flipped from true to false by the sdk

The fix here is to implement a tolerance to these exceptions, similarly to how we do with status code 0.
  • Loading branch information
and-rewsmith authored Mar 23, 2021
1 parent 98c9168 commit bbebd3e
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 11 deletions.
7 changes: 6 additions & 1 deletion test/modules/DirectMethodSender/DirectMethodSenderBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public async Task<Tuple<HttpStatusCode, ulong>> InvokeDirectMethodAsync(string m
}
catch (DeviceNotFoundException e)
{
logger.LogInformation(e, $"Transient exception caught with count {this.directMethodCount}");
logger.LogInformation(e, $"DeviceNotFound exception caught with count {this.directMethodCount}");
return new Tuple<HttpStatusCode, ulong>(HttpStatusCode.NotFound, this.directMethodCount);
}
catch (Exception e)
Expand All @@ -64,6 +64,11 @@ public async Task<Tuple<HttpStatusCode, ulong>> InvokeDirectMethodAsync(string m
}
}

// Retry is needed here because sometimes the test agents have transient
// network issues. Ideally, we would just send the report to the TRC and
// have it analyze whether it is a pass or fail. However the transient
// exceptions don't have status codes which adds complication. This retry
// approach is easier.
async Task<int> InvokeDirectMethodWithRetryAsync(
ILogger logger,
string deviceId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DirectMethodLongHaulReportData
},
7,
true,
7, 0, 0
7, 0, 0, 0
},
new object[]
{
Expand All @@ -47,7 +47,7 @@ class DirectMethodLongHaulReportData
},
7,
false,
6, 0, 1
6, 0, 0, 1
},
new object[]
{
Expand All @@ -66,7 +66,26 @@ class DirectMethodLongHaulReportData
},
7,
false,
6, 1, 0
6, 1, 0, 0
},
new object[]
{
Enumerable.Range(1, 7).Select(v => (ulong)v),
Enumerable.Range(1, 7).Select(v => (ulong)v),
new List<HttpStatusCode> { HttpStatusCode.OK, HttpStatusCode.NotFound, HttpStatusCode.OK, HttpStatusCode.OK, HttpStatusCode.OK, HttpStatusCode.OK, HttpStatusCode.OK },
new DateTime[]
{
new DateTime(2020, 1, 1, 9, 10, 12, 10),
new DateTime(2020, 1, 1, 9, 10, 13, 10),
new DateTime(2020, 1, 1, 9, 10, 21, 10),
new DateTime(2020, 1, 1, 9, 10, 22, 10),
new DateTime(2020, 1, 1, 9, 10, 23, 10),
new DateTime(2020, 1, 1, 9, 10, 24, 10),
new DateTime(2020, 1, 1, 9, 10, 24, 15)
},
7,
false,
6, 0, 1, 0
},
};

Expand All @@ -75,7 +94,7 @@ class DirectMethodLongHaulReportData
{
Enumerable.Range(1, 7).Select(v => (ulong)v),
Enumerable.Range(1, 7).Select(v => (ulong)v),
new List<HttpStatusCode> { HttpStatusCode.OK, (HttpStatusCode)Enum.Parse(typeof(HttpStatusCode), "0"), HttpStatusCode.InternalServerError, HttpStatusCode.OK, HttpStatusCode.ServiceUnavailable, HttpStatusCode.InternalServerError, HttpStatusCode.OK },
new List<HttpStatusCode> { HttpStatusCode.OK, (HttpStatusCode)Enum.Parse(typeof(HttpStatusCode), "0"), HttpStatusCode.InternalServerError, HttpStatusCode.NotFound, HttpStatusCode.ServiceUnavailable, HttpStatusCode.InternalServerError, HttpStatusCode.OK },
new DateTime[]
{
new DateTime(2020, 1, 1, 9, 10, 12, 10),
Expand All @@ -88,7 +107,7 @@ class DirectMethodLongHaulReportData
},
7,
false,
3L, 1L,
2L, 1L, 1L,
new Dictionary<HttpStatusCode, long> { { HttpStatusCode.InternalServerError, 2 }, { HttpStatusCode.ServiceUnavailable, 1 } }
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ public async Task TestCreateReportAsync(
bool expectedIsPassed,
long expectedOk,
long expectedStatusCodeZero,
long expectedStatusCodeNotFound,
long expectedOther)
{
string senderSource = "senderSource";
Expand Down Expand Up @@ -213,11 +214,12 @@ public async Task TestCreateReportAsync(
Assert.Equal(expectedIsPassed, report.IsPassed);
Assert.Equal(expectedOk, report.SenderSuccesses);
Assert.Equal(expectedStatusCodeZero, report.StatusCodeZero);
Assert.Equal(expectedStatusCodeNotFound, report.DeviceNotFound);
Assert.Equal(expectedOther, report.Other.Sum(x => x.Value));
}

[Fact]
public async Task TestOtherStatusCodeOrder()
public async Task TestOtherStatusCodeCounts()
{
var x = DirectMethodLongHaulReportData.GetStatusCodeTestData;
IEnumerable<ulong> senderStoreValues = (IEnumerable<ulong>)x[0];
Expand All @@ -228,7 +230,8 @@ public async Task TestOtherStatusCodeOrder()
bool expectedIsPassed = (bool)x[5];
long expectedOk = (long)x[6];
long expectedStatusCodeZero = (long)x[7];
Dictionary<HttpStatusCode, long> expectedOtherDict = (Dictionary<HttpStatusCode, long>)x[8];
long expectedStatusCodeNotFound = (long)x[8];
Dictionary<HttpStatusCode, long> expectedOtherDict = (Dictionary<HttpStatusCode, long>)x[9];

string senderSource = "senderSource";
string receiverSource = "receiverSource";
Expand Down Expand Up @@ -268,6 +271,7 @@ public async Task TestOtherStatusCodeOrder()
Assert.Equal(expectedIsPassed, report.IsPassed);
Assert.Equal(expectedOk, report.SenderSuccesses);
Assert.Equal(expectedStatusCodeZero, report.StatusCodeZero);
Assert.Equal(expectedStatusCodeNotFound, report.DeviceNotFound);
Assert.Equal(expectedOtherDict.Sum(x => x.Value), report.Other.Sum(x => x.Value));
Assert.Equal(expectedOtherDict[HttpStatusCode.ServiceUnavailable], report.Other[HttpStatusCode.ServiceUnavailable]);
Assert.Equal(expectedOtherDict[HttpStatusCode.InternalServerError], report.Other[HttpStatusCode.InternalServerError]);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright (c) Microsoft. All rights reserved.
namespace TestResultCoordinator.Reports.DirectMethod.LongHaul
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
Expand All @@ -18,6 +17,7 @@ public DirectMethodLongHaulReport(
long senderSuccesses,
long receiverSuccesses,
long statusCodeZero,
long deviceNotFound,
Dictionary<HttpStatusCode, long> other)
: base(testDescription, trackingId, resultType)
{
Expand All @@ -26,6 +26,7 @@ public DirectMethodLongHaulReport(
this.SenderSuccesses = senderSuccesses;
this.ReceiverSuccesses = receiverSuccesses;
this.StatusCodeZero = statusCodeZero;
this.DeviceNotFound = deviceNotFound;
this.Other = other;
}

Expand All @@ -34,6 +35,7 @@ public DirectMethodLongHaulReport(
public long SenderSuccesses { get; }
public long ReceiverSuccesses { get; }
public long StatusCodeZero { get; }
public long DeviceNotFound { get; }
public Dictionary<HttpStatusCode, long> Other { get; }

public override string Title => $"DirectMethod LongHaul Report for [{this.SenderSource}] and [{this.ReceiverSource}] ({this.ResultType})";
Expand All @@ -54,10 +56,11 @@ public bool IsPassedHelper()
// We expect to get this status sometimes because of edgehub restarts, but if we receive too many we should fail the tests.
// TODO: When the SDK allows edgehub to de-register from subscriptions and we make the fix in edgehub, then we can fail tests for any status code 0.
long allStatusCount = this.SenderSuccesses + this.StatusCodeZero + this.Other.Sum(x => x.Value);
bool statusCodeZeroBelowThreshold = (this.StatusCodeZero == 0) || (this.StatusCodeZero < ((double)allStatusCount / 100));
bool statusCodeZeroBelowThreshold = (this.StatusCodeZero == 0) || (this.StatusCodeZero < ((double)allStatusCount / 1000));
bool deviceNotFoundBelowThreshold = (this.DeviceNotFound == 0) || (this.DeviceNotFound < ((double)allStatusCount / 1000));

// Pass if status code zero is below the threshold, and sender and receiver got same amount of successess (or receiver has no results)
return statusCodeZeroBelowThreshold && senderAndReceiverSuccessesPass;
return statusCodeZeroBelowThreshold && deviceNotFoundBelowThreshold && senderAndReceiverSuccessesPass;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public async Task<ITestResultReport> CreateReportAsync()
long senderSuccesses = 0;
long receiverSuccesses = 0;
long statusCodeZero = 0;
long deviceNotFound = 0;
Dictionary<HttpStatusCode, long> other = new Dictionary<HttpStatusCode, long>();
while (await this.SenderTestResults.MoveNextAsync())
{
Expand All @@ -69,6 +70,9 @@ public async Task<ITestResultReport> CreateReportAsync()
case 200:
senderSuccesses++;
break;
case 404:
deviceNotFound++;
break;
default:
if (other.ContainsKey(statusCode))
{
Expand Down Expand Up @@ -102,6 +106,7 @@ public async Task<ITestResultReport> CreateReportAsync()
senderSuccesses,
receiverSuccesses,
statusCodeZero,
deviceNotFound,
other);
}

Expand Down

0 comments on commit bbebd3e

Please sign in to comment.