From d1933ab72f09b068dfd20780f81c77db4cbba007 Mon Sep 17 00:00:00 2001 From: Jason Nochlin <91577+hundredwatt@users.noreply.github.com> Date: Sat, 13 Jan 2024 04:19:29 -0700 Subject: [PATCH] evaluate2.sh: Check output of warmup run and abort early if failed (#333) * refactor: replace xtrace with "print_and_execute" function * nit: stylize error messages * replace out_expected.txt with measurements_1B.out * print * prevent errors on cleanup * run tests and check warmup run output before running benchmark * move "git diff" pretty diff output to test.sh * Ensure "set -e" is re-enabled if we followed a "continue" branch * add timeouts to test.sh invocations * use diff with tocsv.sh to show differences on failed test * add --quiet mode to test.sh * move prepare_$fork.sh invocation to right below hyperfine since test.sh also invokes it * Revert "add --quiet mode to test.sh" This reverts commit 13e9fb7f395c1bd64a62528b8349803bc1366941. * use tee to capture test output to a temp file and print contents on failure --------- Co-authored-by: Jason Nochlin --- evaluate2.sh | 137 ++++++++++++++++++++++++++++++--------------------- test.sh | 2 +- 2 files changed, 83 insertions(+), 56 deletions(-) diff --git a/evaluate2.sh b/evaluate2.sh index 04eebb6332..8a9e42f875 100755 --- a/evaluate2.sh +++ b/evaluate2.sh @@ -33,9 +33,22 @@ RED='\033[0;31m' BOLD_YELLOW='\033[1;33m' RESET='\033[0m' # No Color +MEASUREMENTS_FILE="measurements_1B.txt" +RUNS=5 DEFAULT_JAVA_VERSION="21.0.1-open" RUN_TIME_LIMIT=300 # seconds +TIMEOUT="" +if [ "$(uname -s)" == "Linux" ]; then + TIMEOUT="timeout -v $RUN_TIME_LIMIT" +else # MacOs + if [ -x "$(command -v gtimeout)" ]; then + TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils` + else + echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long." + fi +fi + function check_command_installed { if ! [ -x "$(command -v $1)" ]; then echo "Error: $1 is not installed." >&2 @@ -43,6 +56,11 @@ function check_command_installed { fi } +function print_and_execute() { + echo "+ $@" >&2 + "$@" +} + check_command_installed java check_command_installed hyperfine check_command_installed jq @@ -51,7 +69,7 @@ check_command_installed bc # Validate that ./calculate_average_.sh exists for each fork for fork in "$@"; do if [ ! -f "./calculate_average_$fork.sh" ]; then - echo "Error: ./calculate_average_$fork.sh does not exist." >&2 + echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2 exit 1 fi done @@ -59,7 +77,7 @@ done ## SDKMAN Setup # 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then - echo "Error: sdkman is not installed." >&2 + echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2 exit 1 fi @@ -68,8 +86,7 @@ source "$HOME/.sdkman/bin/sdkman-init.sh" # 3. make sure the default java version is installed if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then - echo "+ sdk install java $DEFAULT_JAVA_VERSION" - sdk install java $DEFAULT_JAVA_VERSION + print_and_execute sdk install java $DEFAULT_JAVA_VERSION fi # 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks @@ -77,8 +94,7 @@ for fork in "$@"; do if [ -f "./prepare_$fork.sh" ]; then grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then - echo "+ sdk install java $version" - sdk install java $version + print_and_execute sdk install java $version fi done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early fi @@ -99,44 +115,68 @@ if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then fi fi -set -o xtrace +print_and_execute java --version +print_and_execute ./mvnw --quiet clean verify -java --version - -./mvnw --quiet clean verify - -rm -f measurements.txt -ln -s measurements_1B.txt measurements.txt - -set +o xtrace +print_and_execute rm -f measurements.txt +print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt echo "" -# check if out_expected.txt exists -if [ ! -f "out_expected.txt" ]; then - echo "Error: out_expected.txt does not exist." >&2 +# check if measurements_xxx.out exists +if [ ! -f "${MEASUREMENTS_FILE%.txt}.out" ]; then + echo -e "${BOLD_RED}ERROR${RESET}: ${MEASUREMENTS_FILE%.txt}.out does not exist." >&2 echo "Please create it with:" - echo " ./calculate_average_baseline.sh > out_expected.txt" + echo "" + echo " ./calculate_average_baseline.sh > ${MEASUREMENTS_FILE%.txt}.out" + echo "" exit 1 fi -# Prepare commands for running benchmarks for each of the forks +# Run tests and benchmark for each fork filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run failed=() +test_output=$(mktemp) for fork in "$@"; do - # Use prepare script to invoke SDKMAN + set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early + + # Run the test suite + print_and_execute $TIMEOUT ./test.sh $fork | tee $test_output > /dev/null 2>&1 + if [ $? -ne 0 ]; then + failed+=("$fork") + echo "" + echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed" + cat $test_output + echo "" + + continue + fi + + # Run the test on $MEASUREMENTS_FILE; this serves as the warmup + print_and_execute $TIMEOUT ./test.sh $fork $MEASUREMENTS_FILE | tee $test_output > /dev/null 2>&1 + if [ $? -ne 0 ]; then + failed+=("$fork") + echo "" + echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork $MEASUREMENTS_FILE failed" + cat $test_output + echo "" + + continue + fi + + # re-link measurements.txt since test.sh deleted it + print_and_execute rm -f measurements.txt + print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt + + # Run prepare script if [ -f "./prepare_$fork.sh" ]; then - echo "+ source ./prepare_$fork.sh" - source "./prepare_$fork.sh" + print_and_execute source "./prepare_$fork.sh" else - echo "+ sdk use java $DEFAULT_JAVA_VERSION" - sdk use java $DEFAULT_JAVA_VERSION + print_and_execute sdk use java $DEFAULT_JAVA_VERSION fi - # Use hyperfine to run the benchmarks for each fork - HYPERFINE_OPTS="--warmup 1 --runs 5 --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out" - - set +e # we don't want hyperfine or diff failing on 1 fork to exit the script early + # Use hyperfine to run the benchmark for each fork + HYPERFINE_OPTS="--warmup 0 --runs $RUNS --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out" # check if this script is running on a Linux box if [ "$(uname -s)" == "Linux" ]; then @@ -144,36 +184,20 @@ for fork in "$@"; do # Linux platform # prepend this with numactl --physcpubind=0-7 for running it only with 8 cores - numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "timeout -v $RUN_TIME_LIMIT ./calculate_average_$fork.sh 2>&1" + numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1" else # MacOS - timeout="" - if [ -x "$(command -v gtimeout)" ]; then - timeout="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils` - else - echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long." - fi - hyperfine $HYPERFINE_OPTS "$timeout ./calculate_average_$fork.sh 2>&1" + hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1" fi # Catch hyperfine command failed if [ $? -ne 0 ]; then failed+=("$fork") + # Hyperfine already prints the error message echo "" + continue fi - - # Verify output - diff <(grep Hamburg $fork-$filetimestamp.out) <(grep Hamburg out_expected.txt) > /dev/null - if [ $? -ne 0 ]; then - echo "" - echo -e "${BOLD_RED}FAILURE${RESET}: output of ${BOLD_WHITE}$fork-$filetimestamp.out${RESET} does not match ${BOLD_WHITE}out_expected.txt${RESET}" - echo "" - - git diff --no-index --word-diff out_expected.txt $fork-$filetimestamp.out - - # add $fork to $failed array - failed+=("$fork") - fi - set -e done +set -e +rm $test_output # Summary echo -e "${BOLD_WHITE}Summary${RESET}" @@ -285,9 +309,12 @@ rm $leaderboard_temp_file # Finalize .out files echo "Raw results saved to file(s):" for fork in "$@"; do - # Append $fork-$filetimestamp-timing.json to $fork-$filetimestamp.out and rm $fork-$filetimestamp-timing.json - cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out - rm $fork-$filetimestamp-timing.json + if [ -f "$fork-$filetimestamp-timing.json" ]; then + cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out + rm $fork-$filetimestamp-timing.json + fi - echo " $fork-$filetimestamp.out" + if [ -f "$fork-$filetimestamp.out" ]; then + echo " $fork-$filetimestamp.out" + fi done diff --git a/test.sh b/test.sh index c45ca755c7..faf85f4df9 100755 --- a/test.sh +++ b/test.sh @@ -45,7 +45,7 @@ for sample in $(ls $INPUT); do rm -f measurements.txt ln -s $sample measurements.txt - diff <("./calculate_average_$FORK.sh") ${sample%.txt}.out + diff <("./calculate_average_$FORK.sh" | ./tocsv.sh) <(./tocsv.sh < ${sample%.txt}.out) done rm measurements.txt