Add script to search for failing seeds (MystenLabs#17364)

Usage and output: ``` ; ./scripts/simtest/seed-search.py simtest --test test_simulated_load_restarts Found binary: /Users/marklogan/dev/sui/target/simulator/deps/simtest-a3d94d04483ce652 running seed: 3145 running seed: 3146 running seed: 3147 running seed: 3149 running seed: 3148 running seed: 3150 running seed: 3151 running seed: 3152 running seed: 3153 running seed: 3154 running seed: 3155 running seed: 3156 running seed: 3157 running seed: 3158 running seed: 3159 running seed: 3160 running seed: 3161 running seed: 3162 running seed: 3163 running seed: 3164 Command '/Users/marklogan/dev/sui/target/simulator/deps/simtest-a3d94d04483ce652 test_simulated_load_restarts' failed with exit code 101 for seed: 3146 stdout: ========================== running 1 test test test::test_simulated_load_restarts has been running for over 60 seconds test test::test_simulated_load_restarts ... FAILED failures: ---- test::test_simulated_load_restarts stdout ---- starting test iteration 0 with seed 3146 Keys saved as Base64 with 33 bytes `flag || privkey` ($BASE64_STR). To see Bech32 format encoding, use `sui keytool export $SUI_ADDRESS` where $SUI_ADDRESS can be found with `sui keytool list`. Or use `sui keytool convert $BASE64_STR`. Keys saved as Base64 with 33 bytes `flag || privkey` ($BASE64_STR). To see Bech32 format encoding, use `sui keytool export $SUI_ADDRESS` where $SUI_ADDRESS can be found with `sui keytool list`. Or use `sui keytool convert $BASE64_STR`. Keys saved as Base64 with 33 bytes `flag || privkey` ($BASE64_STR). To see Bech32 format encoding, use `sui keytool export $SUI_ADDRESS` where $SUI_ADDRESS can be found with `sui keytool list`. Or use `sui keytool convert $BASE64_STR`. Keys saved as Base64 with 33 bytes `flag || privkey` ($BASE64_STR). To see Bech32 format encoding, use `sui keytool export $SUI_ADDRESS` where $SUI_ADDRESS can be found with `sui keytool list`. Or use `sui keytool convert $BASE64_STR`. Keys saved as Base64 with 33 bytes `flag || privkey` ($BASE64_STR). To see Bech32 format encoding, use `sui keytool export $SUI_ADDRESS` where $SUI_ADDRESS can be found with `sui keytool list`. Or use `sui keytool convert $BASE64_STR`. thread '<unnamed>' panicked at /Users/marklogan/dev/sui/crates/sui-core/src/checkpoints/checkpoint_executor/mod.rs:312:29: No new synced checkpoints received for 20s note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace note: run with `MSIM_TEST_SEED=3146` environment variable to reproduce this error failures: test::test_simulated_load_restarts test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 13 filtered out; finished in 64.88s ==========================running seed: 3165 running seed: 3166 running seed: 3167 running seed: 3168 running seed: 3169 running seed: 3170 running seed: 3171 running seed: 3172 running seed: 3173 running seed: 3174 ```
BoilingOil · Apr 26, 2024 · 4ec0f3d · 4ec0f3d
1 parent 4f46d56
commit 4ec0f3d
Showing 1 changed file with 103 additions and 0 deletions.
diff --git a/scripts/simtest/seed-search.py b/scripts/simtest/seed-search.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# Copyright (c) Mysten Labs, Inc.
+# SPDX-License-Identifier: Apache-2.0
+
+import subprocess
+import concurrent.futures
+import sys
+import os
+import random
+import argparse
+
+parser = argparse.ArgumentParser(description='Run the simulator with different seeds')
+parser.add_argument('binary', type=str, help='Name of simulator binary, or full path to binary')
+parser.add_argument('--test', type=str, help='Name of the test to run', required=True)
+parser.add_argument('--num-seeds', type=int, help='Number of seeds to run', default=200)
+parser.add_argument('--seed-start', type=int, help='Starting seed value', default=random.randint(0, 10000))
+parser.add_argument('--concurrency', type=int, help='Number of concurrent tests to run', default=os.cpu_count())
+parser.add_argument('--no-build', type=bool, help='Skip building the test binary', default=False)
+args = parser.parse_args()
+
+def run_command(command, env_vars):
+    """Run a single command using subprocess with specific environment variables."""
+    try:
+        # Merge the new environment variables with the current environment
+        env = os.environ.copy()
+        env.update(env_vars)
+
+        print("running seed: " + env_vars["MSIM_TEST_SEED"])
+        process = subprocess.Popen(command, shell=True, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid)
+        stdout, stderr = process.communicate()
+        exit_code = process.returncode
+        if exit_code != 0:
+            print(f"Command '{command}' failed with exit code {exit_code} for seed: " + env_vars["MSIM_TEST_SEED"])
+            print(f"stdout:\n=========================={stdout.decode('utf-8')}\n==========================")
+            if stderr:
+              print(f"stderr:\n=========================={stderr.decode('utf-8')}\n==========================")
+        else:
+          print("-- seed passed %s" % env_vars["MSIM_TEST_SEED"])
+
+        return 0
+    except subprocess.CalledProcessError as e:
+        print(f"Command '{e.cmd}' failed with exit code {e.returncode} for seed: " + env_vars["MSIM_TEST_SEED"])
+        return e.returncode
+
+def main(commands):
+    """Execute a list of commands with specific environment variables and a concurrency limit of 20."""
+    with concurrent.futures.ThreadPoolExecutor(max_workers=args.concurrency) as executor:
+        # Start the subprocesses
+        future_to_command = {}
+        for cmd, env_vars in commands:
+            future = executor.submit(run_command, cmd, env_vars)
+            future_to_command[future] = cmd
+
+        for future in concurrent.futures.as_completed(future_to_command):
+            cmd = future_to_command[future]
+            exit_code = future.result()
+            if exit_code != 0:
+                print(f"Command '{cmd}' failed with exit code {exit_code}")
+                sys.exit(1)
+
+if __name__ == "__main__":
+    repo_root = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode("utf-8").strip()
+
+    if not args.no_build:
+        os.chdir(repo_root)
+        subprocess.run(["cargo", "simtest", "build", "--test", args.binary], check=True)
+
+    # if binary contains no slashes, search for it in <repo_root>/target/simulator/deps/
+    # otherwise, use the pathname as is
+    if "/" not in args.binary:
+        binary = os.path.join(repo_root, "target/simulator/deps", args.binary)
+        # binary is a prefix of some test file, find the most recent one that matches the prefix
+        if not os.path.isfile(binary):
+            path = os.path.join(repo_root, "target/simulator/deps", args.binary + "*")
+            binary = subprocess.getstatusoutput(f"ls -ltr {path} | tail -n 1")[1].split()[-1]
+            print(f"Found binary: {binary}")
+
+    # check that binary is an executable file
+    if not os.path.isfile(binary) or not os.access(binary, os.X_OK):
+        print(f"Error: {args.binary} is not an executable file")
+        print(f"run: `$ ls -ltr target/simulator/deps/ | tail` to find recent test binaries");
+        sys.exit(1)
+
+    commands = []
+
+    for i in range(1, args.num_seeds + 1):
+        next_seed = args.seed_start + i
+        commands.append(("%s %s" % (binary, args.test), {
+          "MSIM_TEST_SEED": "%d" % next_seed,
+          "RUST_LOG": "off",
+        }))
+
+    # register clean up code to kill all child processes when we exit
+    import atexit
+    import signal
+    def kill_child_processes(*args):
+        print("Killing child processes")
+        os.killpg(0, signal.SIGKILL)
+        sys.exit(0)
+    atexit.register(kill_child_processes)
+    signal.signal(signal.SIGINT, kill_child_processes)
+
+    main(commands)