Skip to content

Commit

Permalink
Check for crash dumps on workflow faiure (microsoft#2327)
Browse files Browse the repository at this point in the history
* Check for crash dumps on workflow failure.
Fastfail the process on watchdog timeout.

Signed-off-by: Alan Jowett <alanjo@microsoft.com>

* Remove intrin.h to prevent conflict with Windows SDK.

Signed-off-by: Alan Jowett <alanjo@microsoft.com>

* Suppress failfast during unit testing

Signed-off-by: Alan Jowett <alanjo@microsoft.com>

---------

Signed-off-by: Alan Jowett <alanjo@microsoft.com>
  • Loading branch information
Alan-Jowett authored Apr 14, 2023
1 parent 3b86a71 commit 9a4853a
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 17 deletions.
23 changes: 15 additions & 8 deletions .github/workflows/reusable-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ jobs:
${{env.TEST_COMMAND}}
- name: Run post test command
if: (steps.skip_check.outputs.should_skip != 'true') && always()
# Run the post test command even if the workflow has failed.
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true')
id: run_post_test_command
working-directory: ./${{env.BUILD_PLATFORM}}/${{env.BUILD_CONFIGURATION}}
run: |
Expand Down Expand Up @@ -278,7 +279,7 @@ jobs:
root_dir: ${{github.workspace}}

- name: Fail if code coverage upload fails
if: (steps.skip_check.outputs.should_skip != 'true') && always()
if: (steps.skip_check.outputs.should_skip != 'true')
run: |
if ($${{ steps.test.upload_code_coverage_report_1!='failure' }}) { exit 0 }
if ($${{ steps.test.upload_code_coverage_report_2!='failure' }}) { exit 0 }
Expand All @@ -299,14 +300,16 @@ jobs:
copy ${{github.workspace}}\${{env.BUILD_PLATFORM}}\${{env.BUILD_CONFIGURATION}}\*.log ${{github.workspace}}\${{env.BUILD_PLATFORM}}\${{env.BUILD_CONFIGURATION}}\TestLogs
- name: Check for crash dumps
if: steps.skip_check.outputs.should_skip != 'true'
# Check for crash dumps even if the workflow failed.
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true')
uses: andstor/file-existence-action@20b4d2e596410855db8f9ca21e96fbe18e12930b
id: check_dumps
with:
files: c:/dumps/${{env.BUILD_PLATFORM}}/${{env.BUILD_CONFIGURATION}}/*.dmp

- name: Upload any crash dumps
if: (steps.skip_check.outputs.should_skip != 'true') && always() && (steps.check_dumps.outputs.files_exists == 'true') && (inputs.gather_dumps == true)
# Upload crash dumps even if the workflow failed.
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true') && (steps.check_dumps.outputs.files_exists == 'true') && (inputs.gather_dumps == true)
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
id: upload_crash_dumps
with:
Expand All @@ -315,14 +318,16 @@ jobs:
retention-days: 5

- name: Check for TestLogs
# Check for test logs even if the workflow failed.
uses: andstor/file-existence-action@20b4d2e596410855db8f9ca21e96fbe18e12930b
if: (steps.skip_check.outputs.should_skip != 'true') && always()
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true')
id: check_logs
with:
files: ./${{env.BUILD_PLATFORM}}/${{env.BUILD_CONFIGURATION}}/TestLogs/*

- name: Upload log files
if: (steps.skip_check.outputs.should_skip != 'true') && always() && (steps.check_logs.outputs.files_exists == 'true')
# Upload test logs even if the workflow failed.
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true') && (steps.check_logs.outputs.files_exists == 'true')
id: upload_logs
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
with:
Expand All @@ -331,14 +336,16 @@ jobs:
retention-days: 5

- name: Check for generated artifacts
if: (steps.skip_check.outputs.should_skip != 'true') && always()
# Check for artifacts even if the workflow failed.
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true')
uses: andstor/file-existence-action@20b4d2e596410855db8f9ca21e96fbe18e12930b
id: check_artifacts
with:
files: ${{github.workspace}}\${{env.BUILD_PLATFORM}}\${{env.BUILD_CONFIGURATION}}\Artifacts\*

- name: Upload generated artifacts
if: (steps.skip_check.outputs.should_skip != 'true') && always() && (steps.check_artifacts.outputs.files_exists == 'true')
# Upload artifacts even if the workflow failed.
if: (success() || failure()) && (steps.skip_check.outputs.should_skip != 'true') && (steps.check_artifacts.outputs.files_exists == 'true')
id: upload_artifacts
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
with:
Expand Down
16 changes: 10 additions & 6 deletions tests/libs/common/ebpf_watchdog_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <errorrep.h>
#include <stdexcept>
#include <stdint.h>
#include <stdlib.h>
#include <werapi.h>

#pragma comment(lib, "wer.lib")
Expand All @@ -17,18 +18,19 @@

/**
* @brief A watchdog timer that triggers a memory dump if the test takes too long.
*
* @tparam raise_fast_fail_on_timeout If true, the test will be terminated with a fast fail.
*/
typedef class _ebpf_watchdog_timer
template <bool raise_fast_fail_on_timeout> class _ebpf_watchdog_timer
{
public:
_ebpf_watchdog_timer(int64_t timeout = EBPF_WATCHDOG_TIMER_DUE_TIME_IN_SECONDS * FILETIME_TICKS_PER_SECOND)
{
timer = CreateThreadpoolTimer(
[](_Inout_ PTP_CALLBACK_INSTANCE, _Inout_opt_ PVOID, _Inout_ PTP_TIMER) {
// Attempt to generate a WER report and raise an assertion failure if that fails.
if (!generate_wer_report()) {
// This will cause the vectored exception handler to be called.
RaiseException(STATUS_ASSERTION_FAILURE, 0, 0, NULL);
generate_wer_report();
if constexpr (raise_fast_fail_on_timeout) {
__fastfail(FAST_FAIL_FATAL_APP_EXIT);
}
},
NULL,
Expand Down Expand Up @@ -73,4 +75,6 @@ typedef class _ebpf_watchdog_timer
}
static constexpr const wchar_t wer_event_type[] = L"Test Application Hang";
PTP_TIMER timer;
} ebpf_watchdog_timer_t;
};

typedef _ebpf_watchdog_timer<true> ebpf_watchdog_timer_t;
4 changes: 2 additions & 2 deletions tests/libs/common/watchdog.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class _watchdog : public Catch::EventListenerBase
void
testCaseStarting(Catch::TestCaseInfo const& /*testCaseInfo*/) override
{
_watchdog_timer = std::make_unique<_ebpf_watchdog_timer>();
_watchdog_timer = std::make_unique<_ebpf_watchdog_timer<true>>();
}

// Log failed tests.
Expand All @@ -31,5 +31,5 @@ class _watchdog : public Catch::EventListenerBase
}

private:
std::unique_ptr<_ebpf_watchdog_timer> _watchdog_timer;
std::unique_ptr<_ebpf_watchdog_timer<true>> _watchdog_timer;
};
2 changes: 1 addition & 1 deletion tests/unit/wer_report_test_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ TEST_CASE("watchdog_timeout", "[wer_report]")
WerReportAddDump_test_expected_exception_param = false;

// Expire the watchdog timer.
_ebpf_watchdog_timer_test watchdog_timer(1);
_ebpf_watchdog_timer<false> watchdog_timer(1);
Sleep(1000);

// Verify that the WER APIs are all called.
Expand Down

0 comments on commit 9a4853a

Please sign in to comment.