Plumb test failures through to github (#15831)

zadjii-msft · web-flow · commit 8f4c63e81be8 · 2023-08-15T09:50:15.000-05:00
This does two bits: 1. correctly marks our tests as failed in xUnit, so that AzDo will pick up that the tests have failed. 2. Actually intentionally mark skipped tests as skipped in xUnit. We were doing this accidentally before. 3. Add a CI step to log test failures in a way that they can show up on GitHub Probably regressed around #6992 and #4490. ### details #### Part the first We were relying on the MUX build scripts to convert our WTT test logs to xUnit format, which AzDo then ingests. That script we used relied on some WinUI-specific logic around retrying tests. They have some logic to auto-retry failed tests. They then mark a test as "skipped" if it passed less than some threshold of times. Since we were never setting that variable, we would mark a test as "skipped" if it had _0_ passes. So, all failures showed up on AzDo as "skipped". Why didn't we notice this? Well, the `Run-Tests.ps1` script will still return `1` if _any_ tests failed. So the test job would fail if there was a failure, AzDo just wouldn't know which test it was. #### part the second Updates `ConvertWttLogToXUnitLog` in `HelixTestHelpers.cs` to understand that a test can be skipped, in addition to pass/fail. Removes all the logic for dealing with retries, cause we didn't need that. #### part the third TAEF doesn't emit error messages in a way that AzDo can immediately pick up on which tests failed. This means that Github gives us this useless error message: ![image](https://github.com/microsoft/terminal/assets/18356694/3be6de00-22e1-421c-93d4-176bd2be4cab) That's the only "error" that AzDo knows about. This PR changes that by adding a build step to manually parse the xUnit results, and log the names of any tests that failed. By logging them with a prefix of `##vso[task.logissue type=error]`, then AzDo will surface that text as an error message. GitHub can then grab that text and surface it too. ### Addenda: Why aren't we using the VsTest module as noted in #4490 (comment), the vstest module is literally 6x slower than just running TAEF directly.
diff --git a/build/Helix/ConvertWttLogToXUnit.ps1 b/build/Helix/ConvertWttLogToXUnit.ps1
@@ -20,9 +20,7 @@ Param(
 $helixResultsContainerUri = $Env:HELIX_RESULTS_CONTAINER_URI
 $helixResultsContainerRsas = $Env:HELIX_RESULTS_CONTAINER_RSAS
 
-$rerunPassesRequiredToAvoidFailure = $env:rerunPassesRequiredToAvoidFailure
-
 Add-Type -Language CSharp -ReferencedAssemblies System.Xml,System.Xml.Linq,System.Runtime.Serialization,System.Runtime.Serialization.Json (Get-Content $PSScriptRoot\HelixTestHelpers.cs -Raw)
 
 $testResultParser = [HelixTestHelpers.TestResultParser]::new($TestNamePrefix, $helixResultsContainerUri, $helixResultsContainerRsas)
-$testResultParser.ConvertWttLogToXUnitLog($WttInputPath, $WttSingleRerunInputPath, $WttMultipleRerunInputPath, $XUnitOutputPath, $rerunPassesRequiredToAvoidFailure)
+$testResultParser.ConvertWttLogToXUnitLog($WttInputPath, $WttSingleRerunInputPath, $WttMultipleRerunInputPath, $XUnitOutputPath)
diff --git a/build/Helix/HelixTestHelpers.cs b/build/Helix/HelixTestHelpers.cs
@@ -20,32 +20,13 @@ public TestResult()
         public string Name { get; set; }
         public string SourceWttFile { get; set; }
         public bool Passed { get; set; }
+        public bool Skipped { get; set; }
         public bool CleanupPassed { get; set; }
         public TimeSpan ExecutionTime { get; set; }
         public string Details { get; set; }
 
         public List<string> Screenshots { get; private set; }
         public List<TestResult> RerunResults { get; private set; }
-
-        // Returns true if the test pass rate is sufficient to avoid being counted as a failure.
-        public bool PassedOrUnreliable(int requiredNumberOfPasses)
-        {
-            if(Passed)
-            {
-                return true;
-            }
-            else
-            {
-                if(RerunResults.Count == 1)
-                {
-                    return RerunResults[0].Passed;
-                }
-                else
-                {
-                    return RerunResults.Where(r => r.Passed).Count() >= requiredNumberOfPasses;
-                }
-            }
-        }
     }
     
     //
@@ -221,7 +202,9 @@ public static TestPass ParseTestWttFile(string fileName, bool cleanupFailuresAre
                         testsExecuting--;
 
                         // If any inner test fails, we'll still fail the outer
-                        currentResult.Passed &= element.Attribute("Result").Value == "Pass";
+                        var value = element.Attribute("Result").Value;
+                        currentResult.Passed = value == "Pass";
+                        currentResult.Skipped = value == "Skipped";
 
                         // Only gather execution data if this is the outer test we ran initially
                         if (testsExecuting == 0)
@@ -498,7 +481,7 @@ public Dictionary<string, string> GetSubResultsJsonByMethodName(string wttInputP
             return subResultsJsonByMethod;
         }
 
-        public void ConvertWttLogToXUnitLog(string wttInputPath, string wttSingleRerunInputPath, string wttMultipleRerunInputPath, string xunitOutputPath, int requiredPassRateThreshold)
+        public void ConvertWttLogToXUnitLog(string wttInputPath, string wttSingleRerunInputPath, string wttMultipleRerunInputPath, string xunitOutputPath)
         {
             TestPass testPass = TestPass.ParseTestWttFileWithReruns(wttInputPath, wttSingleRerunInputPath, wttMultipleRerunInputPath, cleanupFailuresAreRegressions: true, truncateTestNames: false);
             var results = testPass.TestResults;
@@ -510,8 +493,8 @@ public void ConvertWttLogToXUnitLog(string wttInputPath, string wttSingleRerunIn
             // If the test failed sufficiently often enough for it to count as a failed test (determined by a property on the
             // Azure DevOps job), we'll later mark it as failed during test results processing.
 
-            int failedCount = results.Where(r => !r.PassedOrUnreliable(requiredPassRateThreshold)).Count();
-            int skippedCount = results.Where(r => !r.Passed && r.PassedOrUnreliable(requiredPassRateThreshold)).Count();
+            int failedCount = results.Where(r => !r.Passed).Count();
+            int skippedCount = results.Where(r => (!r.Passed && r.Skipped)).Count();
 
             var root = new XElement("assemblies");
 
@@ -557,12 +540,13 @@ public void ConvertWttLogToXUnitLog(string wttInputPath, string wttSingleRerunIn
                 
                 string resultString = string.Empty;
                 
-                if (result.Passed)
+                if (result.Passed && !result.Skipped)
                 {
                     resultString = "Pass";
                 }
-                else if(result.PassedOrUnreliable(requiredPassRateThreshold))
+                else if (result.Skipped)
                 {
+
                     resultString = "Skip";
                 }
                 else
@@ -571,31 +555,25 @@ public void ConvertWttLogToXUnitLog(string wttInputPath, string wttSingleRerunIn
                 }
 
                 
-                test.SetAttributeValue("result", resultString);
-
                 if (!result.Passed)
                 {
-                    // If a test failed, we'll have rerun it multiple times.
-                    // We'll save the subresults to a JSON text file that we'll upload to the helix results container -
-                    // this allows it to be as long as we want, whereas the reason field in Azure DevOps has a 4000 character limit.
-                    string subResultsFileName = methodName + "_subresults.json";
-                    string subResultsFilePath = Path.Combine(Path.GetDirectoryName(wttInputPath), subResultsFileName);
-					
-                    if (result.PassedOrUnreliable(requiredPassRateThreshold))
+                    if (result.Skipped)
                     {
                         var reason = new XElement("reason");
-                        reason.Add(new XCData(GetUploadedFileUrl(subResultsFileName, helixResultsContainerUri, helixResultsContainerRsas)));
+                        reason.Add(new XCData("Test skipped"));
                         test.Add(reason);
                     }
-                    else
-                    {
+                    else {
                         var failure = new XElement("failure");
                         var message = new XElement("message");
-						message.Add(new XCData(GetUploadedFileUrl(subResultsFileName, helixResultsContainerUri, helixResultsContainerRsas)));
+                        message.Add(new XCData("Test failed"));
                         failure.Add(message);
                         test.Add(failure);
                     }
                 }
+
+                test.SetAttributeValue("result", resultString);
+
                 collection.Add(test);
             }
 
diff --git a/build/Helix/OutputTestErrorsForAzureDevops.ps1 b/build/Helix/OutputTestErrorsForAzureDevops.ps1
@@ -0,0 +1,38 @@
+Param(
+    [Parameter(Mandatory = $true)]
+    [string]$XUnitOutputPath
+)
+
+# This script is used to parse the XUnit output from the test runs and print out
+# the tests that failed.
+#
+# Why you might ask? Well, it sure seems like Azure DevOps doesn't like the fact
+# that we just call our tests in a powershell script. It can't seemingly find
+# the actual errors in the TAEF logs. That means when you just go to the
+# "Checks" page on GitHub, the Azure DevOps integration doesn't have anything
+# meaningful to say other than "PowerShell exited with code '1'". If we however,
+# just manually emit the test names formatted with "#[error]" in front of them,
+# well, then the integration will all work like magic.
+
+# Load the test results as a XML object
+$testResults = [xml](Get-Content -Path $XUnitOutputPath)
+
+# Our XML looks like:
+# <assemblies>
+#   <assembly name="MUXControls.Test.dll" test-framework="TAEF" run-date="2023-08-14" run-time="11:38:01" total="524" passed="520" failed="4" skipped="1" time="8943" errors="0">
+#     <collection total="524" passed="520" failed="4" skipped="1" name="Test collection" time="8943">
+#       <test name="ControlCoreTests::TestSimpleClickSelection" type="ControlCoreTests" method="TestSimpleClickSelection" time="0.016" result="Fail">
+
+# Iterate over all the assemblies and print all the tests that failed
+foreach ($assembly in $testResults.assemblies.assembly) {
+    foreach ($collection in $assembly.collection) {
+        foreach ($test in $collection.test) {
+            if ($test.result -eq "Fail") {
+                # This particular format is taken from the Azure DevOps documentation:
+                # https://github.com/microsoft/azure-pipelines-tasks/blob/master/docs/authoring/commands.md
+                # This will treat this line as an error message
+                Write-Output "##vso[task.logissue type=error]$($test.name) Failed"
+            }
+        }
+    }
+}
diff --git a/build/pipelines/templates-v2/job-test-project.yml b/build/pipelines/templates-v2/job-test-project.yml
@@ -65,6 +65,14 @@ jobs:
       arguments: -WttInputPath '${{ parameters.testLogPath }}' -WttSingleRerunInputPath 'unused.wtl' -WttMultipleRerunInputPath 'unused2.wtl' -XUnitOutputPath 'onBuildMachineResults.xml' -TestNamePrefix '$(BuildConfiguration).$(BuildPlatform)'
     condition: ne(variables['PGOBuildMode'], 'Instrument')
 
+  - task: PowerShell@2
+    displayName: 'Manually log test failures'
+    inputs:
+      targetType: filePath
+      filePath: build\Helix\OutputTestErrorsForAzureDevops.ps1
+      arguments: -XUnitOutputPath 'onBuildMachineResults.xml'
+    condition: ne(variables['PGOBuildMode'], 'Instrument')
+
   - task: PublishTestResults@2
     displayName: 'Upload converted test logs'
     condition: ne(variables['PGOBuildMode'], 'Instrument')