Skip to content

Feat: support retries in e2e tests in CI #2145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions .github/workflows/backward-compatibility.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,29 @@ jobs:
mkdir artifacts

- name: Run E2E tests
shell: bash
run: |
./mithril-binaries/e2e/mithril-end-to-end -vvv \
--bin-directory ./mithril-binaries/e2e \
--work-directory=./artifacts \
--devnet-scripts-directory=./mithril-test-lab/mithril-devnet \
--cardano-node-version ${{ matrix.cardano_node_version }} \
--cardano-slot-length 0.25 \
--cardano-epoch-length 45.0 \
--signed-entity-types ${{ needs.prepare-env-variables.outputs.signed-entity-types }} \
&& echo "SUCCESS=true" >> $GITHUB_ENV \
|| (echo "SUCCESS=false" >> $GITHUB_ENV && exit 1)
uses: nick-fields/retry@v3
with:
shell: bash
max_attempts: 3
retry_on_exit_code: 2
timeout_minutes: 10
warning_on_retry: true
command: |
./mithril-binaries/e2e/mithril-end-to-end -vvv \
--bin-directory ./mithril-binaries/e2e \
--work-directory=./artifacts \
--devnet-scripts-directory=./mithril-test-lab/mithril-devnet \
--cardano-node-version ${{ matrix.cardano_node_version }} \
--cardano-slot-length 0.25 \
--cardano-epoch-length 45.0 \
--signed-entity-types ${{ needs.prepare-env-variables.outputs.signed-entity-types }}
EXIT_CODE=$?
if [ $EXIT_CODE -eq 0 ]; then
echo "SUCCESS=true" >> $GITHUB_ENV
else
echo "SUCCESS=false" >> $GITHUB_ENV
fi
exit $EXIT_CODE

- name: Define the JSON file name for the test result
shell: bash
Expand Down
47 changes: 28 additions & 19 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -347,25 +347,34 @@ jobs:
mkdir artifacts

- name: Test
run: |
cat > ./mithril-end-to-end.sh << EOF
#!/bin/bash
set -x
./mithril-end-to-end -vvv \\
--bin-directory ./bin \\
--work-directory=./artifacts \\
--devnet-scripts-directory=./mithril-test-lab/mithril-devnet \\
--mithril-era=${{ matrix.era }} \\
--cardano-node-version ${{ matrix.cardano_node_version }} \\
--cardano-hard-fork-latest-era-at-epoch ${{ matrix.hard_fork_latest_era_at_epoch }} ${{ matrix.extra_args }} \\
EOF
# If there is a next era, we need to specify it with '--mithril-next-era'
if [[ "${{ matrix.next_era }}" != "" ]]; then
echo " --mithril-next-era=${{ matrix.next_era }}" >> ./mithril-end-to-end.sh
fi
chmod u+x ./mithril-end-to-end.sh
./mithril-end-to-end.sh
rm ./mithril-end-to-end.sh
uses: nick-fields/retry@v3
with:
shell: bash
max_attempts: 3
retry_on_exit_code: 2
timeout_minutes: 10
warning_on_retry: true
command: |
cat > ./mithril-end-to-end.sh << EOF
#!/bin/bash
set -x
./mithril-end-to-end -vvv \\
--bin-directory ./bin \\
--work-directory=./artifacts \\
--devnet-scripts-directory=./mithril-test-lab/mithril-devnet \\
--mithril-era=${{ matrix.era }} \\
--cardano-node-version ${{ matrix.cardano_node_version }} \\
--cardano-hard-fork-latest-era-at-epoch ${{ matrix.hard_fork_latest_era_at_epoch }} ${{ matrix.extra_args }} \\
EOF
# If there is a next era, we need to specify it with '--mithril-next-era'
if [[ "${{ matrix.next_era }}" != "" ]]; then
echo " --mithril-next-era=${{ matrix.next_era }}" >> ./mithril-end-to-end.sh
fi
chmod u+x ./mithril-end-to-end.sh
./mithril-end-to-end.sh
EXIT_CODE=$?
rm ./mithril-end-to-end.sh
exit $EXIT_CODE

- name: Upload E2E Tests Artifacts
if: ${{ failure() }}
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion mithril-test-lab/mithril-end-to-end/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mithril-end-to-end"
version = "0.4.49"
version = "0.4.50"
authors = { workspace = true }
edition = { workspace = true }
documentation = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion mithril-test-lab/mithril-end-to-end/src/devnet/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
mod runner;

pub use runner::{Devnet, DevnetBootstrapArgs, DevnetTopology, PoolNode};
pub use runner::{Devnet, DevnetBootstrapArgs, DevnetTopology, PoolNode, RetryableDevnetError};
21 changes: 15 additions & 6 deletions mithril-test-lab/mithril-end-to-end/src/devnet/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ use std::fs::{self, read_to_string, File};
use std::io::Read;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use thiserror::Error;
use tokio::process::Command;

#[derive(Error, Debug, PartialEq, Eq)]
#[error("Retryable devnet error: `{0}`")]
pub struct RetryableDevnetError(pub String);

#[derive(Debug, Clone, Default)]
pub struct Devnet {
artifacts_dir: PathBuf,
Expand Down Expand Up @@ -211,7 +216,9 @@ impl Devnet {
.with_context(|| "Error while starting the devnet")?;
match status.code() {
Some(0) => Ok(()),
Some(code) => Err(anyhow!("Run devnet exited with status code: {code}")),
Some(code) => Err(anyhow!(RetryableDevnetError(format!(
"Run devnet exited with status code: {code}"
)))),
None => Err(anyhow!("Run devnet terminated by signal")),
}
}
Expand Down Expand Up @@ -258,7 +265,9 @@ impl Devnet {
.with_context(|| "Error while delegating stakes to the pools")?;
match status.code() {
Some(0) => Ok(()),
Some(code) => Err(anyhow!("Delegating stakes exited with status code: {code}")),
Some(code) => Err(anyhow!(RetryableDevnetError(format!(
"Delegating stakes exited with status code: {code}"
)))),
None => Err(anyhow!("Delegating stakes terminated by signal")),
}
}
Expand All @@ -282,9 +291,9 @@ impl Devnet {
.with_context(|| "Error while writing era marker on chain")?;
match status.code() {
Some(0) => Ok(()),
Some(code) => Err(anyhow!(
Some(code) => Err(anyhow!(RetryableDevnetError(format!(
"Write era marker on chain exited with status code: {code}"
)),
)))),
None => Err(anyhow!("Write era marker on chain terminated by signal")),
}
}
Expand All @@ -308,9 +317,9 @@ impl Devnet {
.with_context(|| "Error while to transferring funds on chain")?;
match status.code() {
Some(0) => Ok(()),
Some(code) => Err(anyhow!(
Some(code) => Err(anyhow!(RetryableDevnetError(format!(
"Transfer funds on chain exited with status code: {code}"
)),
)))),
None => Err(anyhow!("Transfer funds on chain terminated by signal")),
}
}
Expand Down
153 changes: 133 additions & 20 deletions mithril-test-lab/mithril-end-to-end/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
use anyhow::{anyhow, Context};
use clap::{CommandFactory, Parser, Subcommand};
use slog::{Drain, Level, Logger};
use slog_scope::{error, info, warn};
use slog_scope::{error, info};
use std::{
fs,
fmt, fs,
path::{Path, PathBuf},
process::{ExitCode, Termination},
sync::Arc,
time::Duration,
};
use thiserror::Error;
use tokio::{
signal::unix::{signal, SignalKind},
sync::Mutex,
Expand All @@ -17,7 +19,8 @@ use tokio::{
use mithril_common::StdResult;
use mithril_doc::GenerateDocCommands;
use mithril_end_to_end::{
Devnet, DevnetBootstrapArgs, MithrilInfrastructure, MithrilInfrastructureConfig, RunOnly, Spec,
Devnet, DevnetBootstrapArgs, MithrilInfrastructure, MithrilInfrastructureConfig,
RetryableDevnetError, RunOnly, Spec,
};

/// Tests args
Expand Down Expand Up @@ -152,8 +155,16 @@ enum EndToEndCommands {
GenerateDoc(GenerateDocCommands),
}

#[tokio::main]
async fn main() -> StdResult<()> {
fn main() -> AppResult {
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap()
.block_on(async { main_exec().await })
.into()
}

async fn main_exec() -> StdResult<()> {
let args = Args::parse();
let _guard = slog_scope::set_global_logger(build_logger(&args));

Expand Down Expand Up @@ -198,9 +209,69 @@ async fn main() -> StdResult<()> {

app_stopper.stop().await;
join_set.shutdown().await;

res
}

#[derive(Debug)]
enum AppResult {
Success(),
UnretryableError(anyhow::Error),
RetryableError(anyhow::Error),
Cancelled(anyhow::Error),
}

impl AppResult {
fn exit_code(&self) -> ExitCode {
match self {
AppResult::Success() => ExitCode::SUCCESS,
AppResult::UnretryableError(_) | AppResult::Cancelled(_) => ExitCode::FAILURE,
AppResult::RetryableError(_) => ExitCode::from(2),
}
}
}

impl fmt::Display for AppResult {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
AppResult::Success() => write!(f, "Success"),
AppResult::UnretryableError(error) => write!(f, "Error(Unretryable): {error:?}"),
AppResult::RetryableError(error) => write!(f, "Error(Retryable): {error:?}"),
AppResult::Cancelled(error) => write!(f, "Cancelled: {error:?}"),
}
}
}

impl Termination for AppResult {
fn report(self) -> ExitCode {
let exit_code = self.exit_code();
println!(" ");
println!("{:-^100}", "");
println!("Mithril End to End test outcome:");
println!("{:-^100}", "");
println!("{self}");

exit_code
}
}

impl From<StdResult<()>> for AppResult {
fn from(result: StdResult<()>) -> Self {
match result {
Ok(()) => AppResult::Success(),
Err(error) => {
if error.is::<RetryableDevnetError>() {
AppResult::RetryableError(error)
} else if error.is::<SignalError>() {
AppResult::Cancelled(error)
} else {
AppResult::UnretryableError(error)
}
}
}
}
}

struct App {
devnet: Arc<Mutex<Option<Devnet>>>,
infrastructure: Arc<Mutex<Option<MithrilInfrastructure>>>,
Expand Down Expand Up @@ -338,31 +409,73 @@ fn create_workdir_if_not_exist_clean_otherwise(work_dir: &Path) {
fs::create_dir(work_dir).expect("Work dir creation failure");
}

#[derive(Error, Debug, PartialEq, Eq)]
#[error("Signal received: `{0}`")]
pub struct SignalError(pub String);

fn with_gracefull_shutdown(join_set: &mut JoinSet<StdResult<()>>) {
join_set.spawn(async move {
let mut sigterm = signal(SignalKind::terminate()).expect("Failed to create SIGTERM signal");
sigterm
.recv()
.await
.ok_or(anyhow!("Failed to receive SIGTERM"))
.inspect(|()| warn!("Received SIGTERM"))
sigterm.recv().await;

Err(anyhow!(SignalError("SIGTERM".to_string())))
});

join_set.spawn(async move {
let mut sigterm = signal(SignalKind::interrupt()).expect("Failed to create SIGINT signal");
sigterm
.recv()
.await
.ok_or(anyhow!("Failed to receive SIGINT"))
.inspect(|()| warn!("Received SIGINT"))
sigterm.recv().await;

Err(anyhow!(SignalError("SIGINT".to_string())))
});

join_set.spawn(async move {
let mut sigterm = signal(SignalKind::quit()).expect("Failed to create SIGQUIT signal");
sigterm
.recv()
.await
.ok_or(anyhow!("Failed to receive SIGQUIT"))
.inspect(|()| warn!("Received SIGQUIT"))
sigterm.recv().await;

Err(anyhow!(SignalError("SIGQUIT".to_string())))
});
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn app_result_exit_code() {
let expected_exit_code = ExitCode::SUCCESS;
let exit_code = AppResult::Success().exit_code();
assert_eq!(expected_exit_code, exit_code);

let expected_exit_code = ExitCode::FAILURE;
let exit_code = AppResult::UnretryableError(anyhow::anyhow!("an error")).exit_code();
assert_eq!(expected_exit_code, exit_code);

let expected_exit_code = ExitCode::from(2);
let exit_code = AppResult::RetryableError(anyhow::anyhow!("an error")).exit_code();
assert_eq!(expected_exit_code, exit_code);

let expected_exit_code = ExitCode::FAILURE;
let exit_code = AppResult::Cancelled(anyhow::anyhow!("an error")).exit_code();
assert_eq!(expected_exit_code, exit_code);
}

#[test]
fn app_result_conversion() {
assert!(matches!(AppResult::from(Ok(())), AppResult::Success()));

assert!(matches!(
AppResult::from(Err(anyhow!(RetryableDevnetError("an error".to_string())))),
AppResult::RetryableError(_)
));

assert!(matches!(
AppResult::from(Err(anyhow!("an error"))),
AppResult::UnretryableError(_)
));

assert!(matches!(
AppResult::from(Err(anyhow!(SignalError("an error".to_string())))),
AppResult::Cancelled(_)
));
}
}
Loading