From bbebd3e4186912aa70666201c0af0ca34b68ea73 Mon Sep 17 00:00:00 2001 From: Andrew Smith Date: Tue, 23 Mar 2021 16:09:39 -0700 Subject: [PATCH] Longhaul: Add threshold for device not found (#4642) Longhaul is seeing DeviceNotFound exceptions. We didn't used to see this. Bear and I think that this is because either: - EdgeHub is taking a long time to restart - The exception's transient field was flipped from true to false by the sdk The fix here is to implement a tolerance to these exceptions, similarly to how we do with status code 0. --- .../DirectMethodSenderBase.cs | 7 ++++- .../DirectMethodLongHaulReportData.cs | 29 +++++++++++++++---- ...DirectMethodLongHaulReportGeneratorTest.cs | 8 +++-- .../LongHaul/DirectMethodLongHaulReport.cs | 9 ++++-- .../DirectMethodLongHaulReportGenerator.cs | 5 ++++ 5 files changed, 47 insertions(+), 11 deletions(-) diff --git a/test/modules/DirectMethodSender/DirectMethodSenderBase.cs b/test/modules/DirectMethodSender/DirectMethodSenderBase.cs index 6fbb8b1be98..29224dc32a1 100644 --- a/test/modules/DirectMethodSender/DirectMethodSenderBase.cs +++ b/test/modules/DirectMethodSender/DirectMethodSenderBase.cs @@ -54,7 +54,7 @@ public async Task> InvokeDirectMethodAsync(string m } catch (DeviceNotFoundException e) { - logger.LogInformation(e, $"Transient exception caught with count {this.directMethodCount}"); + logger.LogInformation(e, $"DeviceNotFound exception caught with count {this.directMethodCount}"); return new Tuple(HttpStatusCode.NotFound, this.directMethodCount); } catch (Exception e) @@ -64,6 +64,11 @@ public async Task> InvokeDirectMethodAsync(string m } } + // Retry is needed here because sometimes the test agents have transient + // network issues. Ideally, we would just send the report to the TRC and + // have it analyze whether it is a pass or fail. However the transient + // exceptions don't have status codes which adds complication. This retry + // approach is easier. async Task InvokeDirectMethodWithRetryAsync( ILogger logger, string deviceId, diff --git a/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportData.cs b/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportData.cs index ccec179b4dc..ce0be9e0a68 100644 --- a/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportData.cs +++ b/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportData.cs @@ -28,7 +28,7 @@ class DirectMethodLongHaulReportData }, 7, true, - 7, 0, 0 + 7, 0, 0, 0 }, new object[] { @@ -47,7 +47,7 @@ class DirectMethodLongHaulReportData }, 7, false, - 6, 0, 1 + 6, 0, 0, 1 }, new object[] { @@ -66,7 +66,26 @@ class DirectMethodLongHaulReportData }, 7, false, - 6, 1, 0 + 6, 1, 0, 0 + }, + new object[] + { + Enumerable.Range(1, 7).Select(v => (ulong)v), + Enumerable.Range(1, 7).Select(v => (ulong)v), + new List { HttpStatusCode.OK, HttpStatusCode.NotFound, HttpStatusCode.OK, HttpStatusCode.OK, HttpStatusCode.OK, HttpStatusCode.OK, HttpStatusCode.OK }, + new DateTime[] + { + new DateTime(2020, 1, 1, 9, 10, 12, 10), + new DateTime(2020, 1, 1, 9, 10, 13, 10), + new DateTime(2020, 1, 1, 9, 10, 21, 10), + new DateTime(2020, 1, 1, 9, 10, 22, 10), + new DateTime(2020, 1, 1, 9, 10, 23, 10), + new DateTime(2020, 1, 1, 9, 10, 24, 10), + new DateTime(2020, 1, 1, 9, 10, 24, 15) + }, + 7, + false, + 6, 0, 1, 0 }, }; @@ -75,7 +94,7 @@ class DirectMethodLongHaulReportData { Enumerable.Range(1, 7).Select(v => (ulong)v), Enumerable.Range(1, 7).Select(v => (ulong)v), - new List { HttpStatusCode.OK, (HttpStatusCode)Enum.Parse(typeof(HttpStatusCode), "0"), HttpStatusCode.InternalServerError, HttpStatusCode.OK, HttpStatusCode.ServiceUnavailable, HttpStatusCode.InternalServerError, HttpStatusCode.OK }, + new List { HttpStatusCode.OK, (HttpStatusCode)Enum.Parse(typeof(HttpStatusCode), "0"), HttpStatusCode.InternalServerError, HttpStatusCode.NotFound, HttpStatusCode.ServiceUnavailable, HttpStatusCode.InternalServerError, HttpStatusCode.OK }, new DateTime[] { new DateTime(2020, 1, 1, 9, 10, 12, 10), @@ -88,7 +107,7 @@ class DirectMethodLongHaulReportData }, 7, false, - 3L, 1L, + 2L, 1L, 1L, new Dictionary { { HttpStatusCode.InternalServerError, 2 }, { HttpStatusCode.ServiceUnavailable, 1 } } }; } diff --git a/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportGeneratorTest.cs b/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportGeneratorTest.cs index 417e7c8d49c..cffc15bf46b 100644 --- a/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportGeneratorTest.cs +++ b/test/modules/Modules.Test/TestResultCoordinator/Reports/DirectMethod/DirectMethodLongHaulReportGeneratorTest.cs @@ -173,6 +173,7 @@ public async Task TestCreateReportAsync( bool expectedIsPassed, long expectedOk, long expectedStatusCodeZero, + long expectedStatusCodeNotFound, long expectedOther) { string senderSource = "senderSource"; @@ -213,11 +214,12 @@ public async Task TestCreateReportAsync( Assert.Equal(expectedIsPassed, report.IsPassed); Assert.Equal(expectedOk, report.SenderSuccesses); Assert.Equal(expectedStatusCodeZero, report.StatusCodeZero); + Assert.Equal(expectedStatusCodeNotFound, report.DeviceNotFound); Assert.Equal(expectedOther, report.Other.Sum(x => x.Value)); } [Fact] - public async Task TestOtherStatusCodeOrder() + public async Task TestOtherStatusCodeCounts() { var x = DirectMethodLongHaulReportData.GetStatusCodeTestData; IEnumerable senderStoreValues = (IEnumerable)x[0]; @@ -228,7 +230,8 @@ public async Task TestOtherStatusCodeOrder() bool expectedIsPassed = (bool)x[5]; long expectedOk = (long)x[6]; long expectedStatusCodeZero = (long)x[7]; - Dictionary expectedOtherDict = (Dictionary)x[8]; + long expectedStatusCodeNotFound = (long)x[8]; + Dictionary expectedOtherDict = (Dictionary)x[9]; string senderSource = "senderSource"; string receiverSource = "receiverSource"; @@ -268,6 +271,7 @@ public async Task TestOtherStatusCodeOrder() Assert.Equal(expectedIsPassed, report.IsPassed); Assert.Equal(expectedOk, report.SenderSuccesses); Assert.Equal(expectedStatusCodeZero, report.StatusCodeZero); + Assert.Equal(expectedStatusCodeNotFound, report.DeviceNotFound); Assert.Equal(expectedOtherDict.Sum(x => x.Value), report.Other.Sum(x => x.Value)); Assert.Equal(expectedOtherDict[HttpStatusCode.ServiceUnavailable], report.Other[HttpStatusCode.ServiceUnavailable]); Assert.Equal(expectedOtherDict[HttpStatusCode.InternalServerError], report.Other[HttpStatusCode.InternalServerError]); diff --git a/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReport.cs b/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReport.cs index 09b1f62dedf..8fafafea081 100644 --- a/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReport.cs +++ b/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReport.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. namespace TestResultCoordinator.Reports.DirectMethod.LongHaul { - using System; using System.Collections.Generic; using System.Linq; using System.Net; @@ -18,6 +17,7 @@ public DirectMethodLongHaulReport( long senderSuccesses, long receiverSuccesses, long statusCodeZero, + long deviceNotFound, Dictionary other) : base(testDescription, trackingId, resultType) { @@ -26,6 +26,7 @@ public DirectMethodLongHaulReport( this.SenderSuccesses = senderSuccesses; this.ReceiverSuccesses = receiverSuccesses; this.StatusCodeZero = statusCodeZero; + this.DeviceNotFound = deviceNotFound; this.Other = other; } @@ -34,6 +35,7 @@ public DirectMethodLongHaulReport( public long SenderSuccesses { get; } public long ReceiverSuccesses { get; } public long StatusCodeZero { get; } + public long DeviceNotFound { get; } public Dictionary Other { get; } public override string Title => $"DirectMethod LongHaul Report for [{this.SenderSource}] and [{this.ReceiverSource}] ({this.ResultType})"; @@ -54,10 +56,11 @@ public bool IsPassedHelper() // We expect to get this status sometimes because of edgehub restarts, but if we receive too many we should fail the tests. // TODO: When the SDK allows edgehub to de-register from subscriptions and we make the fix in edgehub, then we can fail tests for any status code 0. long allStatusCount = this.SenderSuccesses + this.StatusCodeZero + this.Other.Sum(x => x.Value); - bool statusCodeZeroBelowThreshold = (this.StatusCodeZero == 0) || (this.StatusCodeZero < ((double)allStatusCount / 100)); + bool statusCodeZeroBelowThreshold = (this.StatusCodeZero == 0) || (this.StatusCodeZero < ((double)allStatusCount / 1000)); + bool deviceNotFoundBelowThreshold = (this.DeviceNotFound == 0) || (this.DeviceNotFound < ((double)allStatusCount / 1000)); // Pass if status code zero is below the threshold, and sender and receiver got same amount of successess (or receiver has no results) - return statusCodeZeroBelowThreshold && senderAndReceiverSuccessesPass; + return statusCodeZeroBelowThreshold && deviceNotFoundBelowThreshold && senderAndReceiverSuccessesPass; } } } diff --git a/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReportGenerator.cs b/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReportGenerator.cs index dcd84bfe358..dd4f323c00f 100644 --- a/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReportGenerator.cs +++ b/test/modules/TestResultCoordinator/Reports/DirectMethod/LongHaul/DirectMethodLongHaulReportGenerator.cs @@ -55,6 +55,7 @@ public async Task CreateReportAsync() long senderSuccesses = 0; long receiverSuccesses = 0; long statusCodeZero = 0; + long deviceNotFound = 0; Dictionary other = new Dictionary(); while (await this.SenderTestResults.MoveNextAsync()) { @@ -69,6 +70,9 @@ public async Task CreateReportAsync() case 200: senderSuccesses++; break; + case 404: + deviceNotFound++; + break; default: if (other.ContainsKey(statusCode)) { @@ -102,6 +106,7 @@ public async Task CreateReportAsync() senderSuccesses, receiverSuccesses, statusCodeZero, + deviceNotFound, other); }