From 6481d2309867f00cab7730300c23ad44edb80939 Mon Sep 17 00:00:00 2001 From: William Smith Date: Mon, 17 Apr 2023 10:51:47 -0700 Subject: [PATCH] [CompatibilityChecker] Point sui paths to larger disk partition (#10993) ## Description Also * Fix sharding scheme to create overlap for full coverage * Remove `clean` mode, which doesn't seem to work due to boolean weirdness in github actions * Fix epoch time limit arg * Put both workflows in the same concurrency group so that a new run of one blocks on completion of an ongoing run of any other ## Test Plan Will test after land as the scripts need to be updated in the repo to do a test run --- If your changes are not user-facing and not a breaking change, you can skip the following section. Otherwise, please indicate what changed, and then add to the Release Notes section as highlighted during the release process. ### Type of Change (Check all that apply) - [ ] user-visible impact - [ ] breaking change for a client SDKs - [ ] breaking change for FNs (FN binary must upgrade) - [ ] breaking change for validators or node operators (must upgrade binaries) - [ ] breaking change for on-chain data layout - [ ] necessitate either a data wipe or data migration ### Release notes --- .github/workflows/compatibility_checker.yml | 16 +++---- .github/workflows/compatibility_sharded.yml | 46 ++++++++++----------- scripts/compatibility/fullnode-sync.sh | 6 +-- 3 files changed, 29 insertions(+), 39 deletions(-) diff --git a/.github/workflows/compatibility_checker.yml b/.github/workflows/compatibility_checker.yml index ab575980dc5c7..483887f16ac61 100644 --- a/.github/workflows/compatibility_checker.yml +++ b/.github/workflows/compatibility_checker.yml @@ -1,7 +1,7 @@ name: Compatibility Checker concurrency: - group: ${{ github.workflow }} + group: compatibility-checker-cluster on: schedule: @@ -13,11 +13,6 @@ on: required: false description: "Run with verbose logging" default: false - preserve-state: - type: boolean - required: false - description: "Preserve database state of previous run" - default: true jobs: genesis-sync-test: @@ -56,9 +51,8 @@ jobs: # Wipe database from previous runs - name: Wipe SuiDB - if: ${{ github.event.inputs.preserve-state == false }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@fullnode-compat-test-01 "sudo rm -rf /var/lib/sui/suidb || true" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@fullnode-compat-test-01 "sudo rm -rf /opt/sui/suidb || true" # Checkout the latest sui repo - name: Checkout sui repo @@ -72,11 +66,11 @@ jobs: - name: Fetch sui-node binary run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 30 ssh ubuntu@fullnode-compat-test-01 "mkdir -p /var/lib/sui/bin && cd /var/lib/sui/bin && wget https://sui-releases.s3.us-east-1.amazonaws.com/${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}/sui-node && chmod +x sui-node" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 30 ssh ubuntu@fullnode-compat-test-01 "rm -rf /opt/sui/bin && mkdir -p /opt/sui/bin && cd /opt/sui/bin && wget https://sui-releases.s3.us-east-1.amazonaws.com/${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}/sui-node && chmod +x sui-node" - name: Run sync script run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 720 ssh ubuntu@fullnode-compat-test-01 "cd ~/sui && echo "Running compatibility checker against binary built at commit hash ${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}" && CARGO_TERM_COLOR=always ./scripts/compatibility/fullnode-sync.sh -p /var/lib/sui/bin/sui-node -n testnet ${{ github.event.inputs.verbose == true && '-v' || '' }}" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 720 ssh ubuntu@fullnode-compat-test-01 "cd ~/sui && echo "Running compatibility checker against binary built at commit hash ${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}" && CARGO_TERM_COLOR=always ./scripts/compatibility/fullnode-sync.sh -p /opt/sui/bin/sui-node -n testnet ${{ github.event.inputs.verbose == true && '-v' || '' }}" notify: name: Notify @@ -140,7 +134,7 @@ jobs: "type": "section", "text": { "type": "mrkdwn", - "text": "To debug failures: See run logs; Rerun with `verbose` logging enabled, `clean` disabled in order to sync from last place); `tsh ssh ubuntu@fullnode-compat-test-01` if needed. \nMetrics can be viewed at https://metrics.sui.io/d/_nu1mWC7zx/sui-fullnode?orgId=1&refresh=1m&var-Environment=mysten-metrics-internal&var-network=testnet&var-host=fullnode-compat-test-01" + "text": "To debug failures: See run logs; Rerun with `verbose` logging enabled; `tsh ssh ubuntu@fullnode-compat-test-01` if needed. \nMetrics can be viewed at https://metrics.sui.io/d/_nu1mWC7zx/sui-fullnode?orgId=1&refresh=1m&var-Environment=mysten-metrics-internal&var-network=testnet&var-host=fullnode-compat-test-01" } } ] diff --git a/.github/workflows/compatibility_sharded.yml b/.github/workflows/compatibility_sharded.yml index cff0dc103fecc..412059f23a623 100644 --- a/.github/workflows/compatibility_sharded.yml +++ b/.github/workflows/compatibility_sharded.yml @@ -1,7 +1,7 @@ name: Compatibility Checker (Sharded) concurrency: - group: ${{ github.workflow }} + group: compatibility-checker-cluster on: schedule: @@ -13,27 +13,24 @@ on: required: false description: "Run with verbose logging" default: false - preserve-state: - type: boolean - required: false - description: "Preserve database state of previous run" - default: true jobs: genesis-sync-test-sharded: strategy: matrix: - shard_manifest: - [ + shard_manifest: [ + # we need shards to overlap by one epoch for full coverage, as snapshot N + # does not start at the beginning of epoch N, but rather at the end of + # epoch N, and immediately reconfigures to N+1. { hostname: "fullnode-compat-test-01", start_epoch: 0, - end_epoch: 748, + end_epoch: 749, }, { hostname: "fullnode-compat-test-02", start_epoch: 748, - end_epoch: 753, + end_epoch: 754, }, { hostname: "fullnode-compat-test-03", @@ -86,42 +83,41 @@ jobs: - name: Fetch sui-node binary run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 30 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && mkdir -p /var/lib/sui/bin && cd /var/lib/sui/bin && wget https://sui-releases.s3.us-east-1.amazonaws.com/${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}/sui-node && chmod +x sui-node" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 30 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && rm -rf /opt/sui/bin && mkdir -p /opt/sui/bin && cd /opt/sui/bin && wget https://sui-releases.s3.us-east-1.amazonaws.com/${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}/sui-node && chmod +x sui-node" # Wipe database from previous runs - name: Wipe SuiDB - if: ${{ github.event.inputs.preserve-state == false }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && sudo rm -rf /var/lib/sui/suidb || true" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && sudo rm -rf /opt/sui/suidb || true" - name: Setup DB structure - if: ${{ (matrix.shard_manifest.start_epoch != 0) && (github.event.inputs.preserve-state == false) }} + if: ${{ (matrix.shard_manifest.start_epoch != 0) }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && mkdir -p /var/lib/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }} && cd /var/lib/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }} && mkdir checkpoints && mkdir epochs && mkdir -p store/perpetual" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && mkdir -p /opt/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }} && cd /opt/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }} && mkdir checkpoints && mkdir epochs && mkdir -p store/perpetual" - name: Download epoch snapshot artifacts - if: ${{ (matrix.shard_manifest.start_epoch != 0) && (github.event.inputs.preserve-state == false) }} + if: ${{ (matrix.shard_manifest.start_epoch != 0) }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 60 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && cd /var/lib/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }}/epochs && wget -i /var/lib/sui/s3_urls/epochs" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 60 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && cd /opt/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }}/epochs && wget -i /opt/sui/s3_urls/epochs" - name: Download checkpoint snapshot artifacts - if: ${{ (matrix.shard_manifest.start_epoch != 0) && (github.event.inputs.preserve-state == false) }} + if: ${{ (matrix.shard_manifest.start_epoch != 0) }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 60 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && cd /var/lib/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }}/checkpoints && wget -i /var/lib/sui/s3_urls/checkpoints" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 60 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && cd /opt/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }}/checkpoints && wget -i /opt/sui/s3_urls/checkpoints" - name: Download perpetual snapshot artifacts - if: ${{ (matrix.shard_manifest.start_epoch != 0) && (github.event.inputs.preserve-state == false) }} + if: ${{ (matrix.shard_manifest.start_epoch != 0) }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 60 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && cd /var/lib/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }}/store/perpetual && wget -i /var/lib/sui/s3_urls/store_perpetual" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 60 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && cd /opt/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }}/store/perpetual && wget -i /opt/sui/s3_urls/store_perpetual" - name: Mark snapshot as live - if: ${{ (matrix.shard_manifest.start_epoch != 0) && (github.event.inputs.preserve-state == false) }} + if: ${{ (matrix.shard_manifest.start_epoch != 0) }} run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && mv /var/lib/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }} /var/lib/sui/suidb/live" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 5 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && mv /opt/sui/suidb/epoch_${{ matrix.shard_manifest.start_epoch }} /opt/sui/suidb/live" - name: Run sync script run: | - tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 720 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && source ~/.cargo/env && cd ~/sui && echo "Running compatibility checker against binary built at commit hash ${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}" && CARGO_TERM_COLOR=always ./scripts/compatibility/fullnode-sync.sh -p /var/lib/sui/bin/sui-node -n testnet -e ${{ matrix.shard_manifest.end_epoch }} ${{ github.event.inputs.verbose == true && '-v' || '' }}" + tsh -i ${{ steps.auth.outputs.identity-file }} --ttl 720 ssh ubuntu@${{ matrix.shard_manifest.hostname }} "source ~/.bashrc && source ~/.cargo/env && cd ~/sui && echo "Running compatibility checker against binary built at commit hash ${{ steps.get-ci-tag.outputs.CI_COMMIT_HASH }}" && CARGO_TERM_COLOR=always ./scripts/compatibility/fullnode-sync.sh -p /opt/sui/bin/sui-node -n testnet -e ${{ matrix.shard_manifest.end_epoch }} ${{ github.event.inputs.verbose == true && '-v' || '' }}" notify: name: Notify @@ -185,7 +181,7 @@ jobs: "type": "section", "text": { "type": "mrkdwn", - "text": "To debug failures: See run logs; Rerun with `verbose` logging enabled, `clean` disabled in order to sync from last place); `tsh ssh ubuntu@fullnode-compat-test-01` if needed. \nMetrics can be viewed at https://metrics.sui.io/d/_nu1mWC7zx/sui-fullnode?orgId=1&refresh=1m&var-Environment=mysten-metrics-internal&var-network=testnet&var-host=fullnode-compat-test-01" + "text": "To debug failures: See run logs; Rerun with `verbose` logging enabled; `tsh ssh ubuntu@fullnode-compat-test-01` if needed (or -02, -03). \nMetrics can be viewed at https://metrics.sui.io/d/_nu1mWC7zx/sui-fullnode?orgId=1&refresh=1m&var-Environment=mysten-metrics-internal&var-network=testnet&var-host=fullnode-compat-test-01" } } ] diff --git a/scripts/compatibility/fullnode-sync.sh b/scripts/compatibility/fullnode-sync.sh index 1769445511e84..b8b633893a00e 100755 --- a/scripts/compatibility/fullnode-sync.sh +++ b/scripts/compatibility/fullnode-sync.sh @@ -7,7 +7,7 @@ set -e DEFAULT_NETWORK="testnet" CLEAN=0 LOG_LEVEL="info" -SUI_RUN_PATH="/var/lib/sui" +SUI_RUN_PATH="/opt/sui" VERBOSE="" function cleanup { @@ -17,7 +17,7 @@ function cleanup { trap cleanup EXIT -while getopts "hvn:e:p:" OPT; do +while getopts "hvn:e:p:t:" OPT; do case $OPT in p) SUI_BIN_PATH=$OPTARG ;; @@ -74,7 +74,7 @@ if [[ ! -f "${SUI_RUN_PATH}/fullnode.yaml" ]]; then sed -i "s|suidb|${SUI_RUN_PATH}/suidb|g" ${SUI_RUN_PATH}/fullnode.yaml if [[ $NETWORK != "devnet" ]]; then - cat >> "$SUI_RUN_PATH/fullnode.yaml" <<- EOM + cat >> "${SUI_RUN_PATH}/fullnode.yaml" <<- EOM p2p-config: seed-peers: