Skip to content

Commit 7d7e1fe

Browse files
Add optional step to archive post-reexecution state to S3 (#4172)
Signed-off-by: aaronbuchwald <aaron.buchwald56@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent ebe0558 commit 7d7e1fe

File tree

6 files changed

+102
-36
lines changed

6 files changed

+102
-36
lines changed

.github/actions/c-chain-reexecution-benchmark/action.yml

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ inputs:
1010
default: '250000'
1111
source-block-dir:
1212
description: 'The source block directory. Supports S3 directory/zip and local directories.'
13-
default: 's3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip'
13+
default: 's3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**'
1414
current-state-dir:
1515
description: 'The current state directory. Supports S3 directory/zip and local directories.'
16-
default: 's3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip'
16+
default: 's3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**'
1717
aws-role:
1818
description: 'AWS role to assume for S3 access.'
1919
required: true
@@ -43,6 +43,9 @@ inputs:
4343
description: 'Whether to push the benchmark result to GitHub.'
4444
required: true
4545
default: false
46+
push-post-state:
47+
description: 'S3 destination to copy the current-state directory after completing re-execution. If empty, this will be skipped.'
48+
default: ''
4649

4750
runs:
4851
using: composite
@@ -53,7 +56,7 @@ runs:
5356
run: |
5457
{
5558
echo "EXECUTION_DATA_DIR=${{ inputs.workspace }}/reexecution-data"
56-
echo "BENCHMARK_OUTPUT_FILE=${{ inputs.workspace }}/reexecute-cchain-range-benchmark-res.txt"
59+
echo "BENCHMARK_OUTPUT_FILE=output.txt"
5760
echo "START_BLOCK=${{ inputs.start-block }}"
5861
echo "END_BLOCK=${{ inputs.end-block }}"
5962
echo "SOURCE_BLOCK_DIR=${{ inputs.source-block-dir }}"
@@ -67,7 +70,15 @@ runs:
6770
- name: Run C-Chain Re-Execution
6871
uses: ./.github/actions/run-monitored-tmpnet-cmd
6972
with:
70-
run: ./scripts/run_task.sh reexecute-cchain-range-with-copied-data
73+
run: |
74+
./scripts/run_task.sh reexecute-cchain-range-with-copied-data \
75+
EXECUTION_DATA_DIR=${{ env.EXECUTION_DATA_DIR }} \
76+
SOURCE_BLOCK_DIR=${{ env.SOURCE_BLOCK_DIR }} \
77+
CURRENT_STATE_DIR=${{ env.CURRENT_STATE_DIR }} \
78+
START_BLOCK=${{ env.START_BLOCK }} \
79+
END_BLOCK=${{ env.END_BLOCK }} \
80+
LABELS=${{ env.LABELS }} \
81+
BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }}
7182
prometheus_push_url: ${{ inputs.prometheus-push-url }}
7283
prometheus_username: ${{ inputs.prometheus-username }}
7384
prometheus_password: ${{ inputs.prometheus-password }}
@@ -100,3 +111,10 @@ runs:
100111
output-file-path: ${{ env.BENCHMARK_OUTPUT_FILE }}
101112
github-token: ${{ inputs.github-token }}
102113
auto-push: ${{ inputs.push-github-action-benchmark }}
114+
115+
- uses: ./.github/actions/install-nix
116+
if: ${{ inputs.push-post-state != '' }}
117+
- name: Push Post-State to S3 (if not exists)
118+
if: ${{ inputs.push-post-state != '' }}
119+
shell: nix develop --command bash -x {0}
120+
run: ./scripts/run_task.sh export-dir-to-s3 LOCAL_SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ S3_DST=${{ inputs.push-post-state }}

.github/workflows/c-chain-reexecution-benchmark-arc.yml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,18 @@ on:
1515
source-block-dir:
1616
description: 'The source block directory. Supports S3 directory/zip and local directories.'
1717
required: false
18-
default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip
18+
default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**
1919
current-state-dir:
2020
description: 'The current state directory. Supports S3 directory/zip and local directories.'
2121
required: false
22-
default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip
22+
default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**
2323
runner:
2424
description: 'Runner to execute the benchmark. Input to the runs-on field of the job.'
2525
required: false
2626
default: ubuntu-latest
27+
push-post-state:
28+
description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.'
29+
default: ''
2730

2831
schedule:
2932
- cron: '0 9 * * *' # Runs every day at 09:00 UTC (04:00 EST)
@@ -41,20 +44,21 @@ jobs:
4144
- uses: actions/checkout@v4
4245
- name: Set task parameters
4346
id: set-params
47+
shell: bash
4448
run: |
4549
if [[ "${{ github.event_name }}" == "schedule" ]]; then
4650
{
4751
echo "start-block=33000001"
4852
echo "end-block=34000000"
49-
echo "source-block-dir=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-50m-ldb.zip"
50-
echo "current-state-dir=s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/"
53+
echo "source-block-dir=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-50m-ldb/**"
54+
echo "current-state-dir=s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**"
5155
} >> "$GITHUB_OUTPUT"
5256
elif [[ "${{ github.event_name }}" == "pull_request" ]]; then
5357
{
5458
echo "start-block=101"
5559
echo "end-block=250000"
56-
echo "source-block-dir=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip"
57-
echo "current-state-dir=s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip"
60+
echo "source-block-dir=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**"
61+
echo "current-state-dir=s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**"
5862
} >> "$GITHUB_OUTPUT"
5963
else
6064
{
@@ -82,6 +86,7 @@ jobs:
8286
prometheus-username: ${{ secrets.PROMETHEUS_ID || '' }}
8387
prometheus-password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
8488
push-github-action-benchmark: ${{ github.event_name == 'schedule' }}
85-
aws-role: ${{ secrets.AWS_S3_READ_ONLY_ROLE }}
89+
aws-role: ${{ github.event.inputs.push-post-state != '' && secrets.AWS_S3_RW_ROLE || secrets.AWS_S3_READ_ONLY_ROLE }}
8690
aws-region: 'us-east-2'
8791
github-token: ${{ secrets.GITHUB_TOKEN }}
92+
push-post-state: ${{ github.event.inputs.push-post-state }}

.github/workflows/c-chain-reexecution-benchmark-gh-runner.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ jobs:
1717
with:
1818
start-block: 101
1919
end-block: 250000
20-
source-block-dir: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip
21-
current-state-dir: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip
20+
source-block-dir: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**
21+
current-state-dir: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**
2222
prometheus-username: ${{ secrets.PROMETHEUS_ID || '' }}
2323
prometheus-password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
2424
aws-role: ${{ secrets.AWS_S3_READ_ONLY_ROLE }}

Taskfile.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ tasks:
101101
LOCAL_SRC: '{{.LOCAL_SRC}}'
102102
S3_DST: '{{.S3_DST}}'
103103
cmds:
104-
- cmd: s5cmd cp {{.LOCAL_SRC}} {{.S3_DST}}
104+
- cmd: bash -x ./scripts/copy_dir.sh {{.LOCAL_SRC}} {{.S3_DST}}
105105

106106
generate-mocks:
107107
desc: Generates testing mocks
@@ -131,8 +131,8 @@ tasks:
131131
desc: Imports the C-Chain block and state data to re-execute. Defaults to import the first 200 and the current state created with the default config of the C-Chain (hashdb).
132132
vars:
133133
EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}'
134-
SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-200.zip"}}'
135-
CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip"}}'
134+
SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-200-ldb/**"}}'
135+
CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**"}}'
136136
cmds:
137137
- task: import-s3-to-dir
138138
vars:
@@ -203,8 +203,8 @@ tasks:
203203
desc: Combines import-cchain-reexecute-range and reexecute-cchain-range
204204
vars:
205205
EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}'
206-
SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip"}}'
207-
CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip"}}'
206+
SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/"}}'
207+
CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/"}}'
208208
START_BLOCK: '{{.START_BLOCK | default "101"}}'
209209
END_BLOCK: '{{.END_BLOCK | default "250000"}}'
210210
LABELS: '{{.LABELS | default ""}}'

scripts/copy_dir.sh

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,31 @@ set -euo pipefail
99

1010
if [ $# -ne 2 ]; then
1111
echo "Usage: $0 <source_directory> <destination_directory>"
12-
echo "S3 Example: $0 's3://bucket1/path1' /dest/dir"
12+
echo "Import from S3 Example: $0 's3://bucket1/path1' /dest/dir"
13+
echo "Export to S3 Example: $0 '/local/path1' 's3://bucket2/path2'"
1314
echo "Local Example: $0 '/local/path1' /dest/dir"
1415
exit 1
1516
fi
1617

1718
SRC="$1"
1819
DST="$2"
1920

20-
# Ensure destination directory exists
21-
mkdir -p "$DST"
22-
2321
# Function to copy from a single source to destination
24-
copy_source() {
22+
function copy_source() {
2523
local source="$1"
2624
local dest="$2"
2725

2826
# Check if source starts with s3://
29-
if [[ "$source" == s3://* ]]; then
30-
echo "Copying from S3: $source -> $dest"
27+
if [[ "$source" == s3://* || "$dest" == s3://* ]]; then
3128
# Use s5cmd to copy from S3
29+
echo "Copying from S3: $source to $dest"
3230
time s5cmd cp "$source" "$dest"
33-
34-
# If we copied a zip, extract it in place
35-
if [[ "$source" == *.zip ]]; then
36-
echo "Extracting zip file in place"
37-
time unzip "$dest"/*.zip -d "$dest"
38-
rm "$dest"/*.zip
39-
fi
4031
else
41-
echo "Copying from local filesystem: $source -> $dest"
4232
# Use cp for local filesystem with recursive support
33+
34+
# Ensure destination directory exists
35+
mkdir -p "$dest"
36+
4337
if [ -d "$source" ]; then
4438
time cp -r "$source"/* "$dest/"
4539
elif [ -f "$source" ]; then
@@ -51,4 +45,53 @@ copy_source() {
5145
fi
5246
}
5347

48+
# Function to check the destination directory does not exist to avoid
49+
# overwrites
50+
function check_dst_not_exists() {
51+
local dst="$1"
52+
53+
if [[ "$dst" == s3://* ]]; then
54+
# Validate the S3 path format as s3://<bucket-name>/<directory-name>/
55+
echo "Checking S3 path format: $dst"
56+
if ! [[ "$dst" =~ ^s3://[^/]+/([^/]+/)$ ]]; then
57+
echo "Error: Invalid S3 path format."
58+
echo "Expected format: s3://<bucket-name>/<directory-name>/"
59+
exit 1
60+
fi
61+
62+
# Note: S3 tooling does not provide a native way to check for an empty
63+
# directory. To avoid accidental overwrites, we use a best-effort, brittle
64+
# workaround that relies on the expected status code and error message
65+
# from s5cmd ls.
66+
# If the error message changes, this script would be expected to fail
67+
# by misreporting non-existent directories as existing, which means
68+
# a change in beahvior would cause the script to fail to copy rather
69+
# than allow accidental overwrites.
70+
echo "Checking if S3 path exists: $dst"
71+
if ! OUTPUT=$(s5cmd ls "$dst" 2>&1); then
72+
# If the command fails, check for the expected error message.
73+
if [[ "$OUTPUT" == *"no object found"* ]]; then
74+
echo "Verified S3 destination: '$dst' is empty"
75+
else
76+
echo "Error: failed to check for contents of $dst"
77+
echo "$OUTPUT"
78+
exit 1
79+
fi
80+
else
81+
# Success indicates a non-empty destination, so we exit with an error.
82+
echo "Cannot copy to non-empty destination: '$dst':"
83+
echo "$OUTPUT"
84+
exit 1
85+
fi
86+
else
87+
echo "Checking if local path exists: $dst"
88+
if [[ -e "$dst" ]]; then
89+
echo "Local destination directory '$dst' already exists. Exiting."
90+
exit 1
91+
fi
92+
fi
93+
}
94+
95+
check_dst_not_exists "$DST"
96+
5497
copy_source "$SRC" "$DST"

tests/reexecute/c/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ To authenticate metrics collection (enabled by default), provide the Prometheus
2828

2929
## Import Blocks
3030

31-
To import the first 200 blocks for re-execution, you can fetch the following ZIP from S3: `s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb.zip`:
31+
To import the first 200 blocks for re-execution, you can fetch the following directory from S3: `s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb/`:
3232

3333
```bash
34-
task import-s3-to-dir SRC=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb.zip DST=$HOME/exec-data/blocks
34+
task import-s3-to-dir SRC=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb/** DST=$HOME/exec-data/blocks
3535
```
3636

3737
## Create C-Chain State Snapshot
@@ -95,7 +95,7 @@ Note: if you attempt to re-execute a second time on the same data set, it will f
9595
Provide the parameters explicitly that we have just used locally:
9696

9797
```bash
98-
task reexecute-cchain-range-with-copied-data EXECUTION_DATA_DIR=$HOME/reexec-data-params SOURCE_BLOCK_DIR=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb.zip CURRENT_STATE_DIR=s3://avalanchego-bootstrap-testing/cchain-current-state-test/** START_BLOCK=101 END_BLOCK=10000
98+
task reexecute-cchain-range-with-copied-data EXECUTION_DATA_DIR=$HOME/reexec-data-params SOURCE_BLOCK_DIR=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb/** CURRENT_STATE_DIR=s3://avalanchego-bootstrap-testing/cchain-current-state-test/** START_BLOCK=101 END_BLOCK=10000
9999
```
100100

101101
## Run Default C-Chain Benchmark

0 commit comments

Comments
 (0)