Skip to content

Commit 7416d06

Browse files
authored
Add genesis sync test to CI (#7561)
* #7550 Use existing code from @jimmygchen in #7530 and modify for genesis sync test. Thanks @jimmygchen !
1 parent 8c6abc0 commit 7416d06

File tree

5 files changed

+241
-3
lines changed

5 files changed

+241
-3
lines changed

.github/workflows/local-testnet.yml

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020

2121
- name: Build Docker image
2222
run: |
23-
docker build --build-arg FEATURES=portable -t lighthouse:local .
23+
docker build --build-arg FEATURES=portable,spec-minimal -t lighthouse:local .
2424
docker save lighthouse:local -o lighthouse-docker.tar
2525
2626
- name: Upload Docker image artifact
@@ -213,6 +213,49 @@ jobs:
213213
scripts/local_testnet/logs
214214
retention-days: 3
215215

216+
# Test syncing from genesis on a local testnet. Aims to cover forward syncing both short and long distances.
217+
genesis-sync-test:
218+
name: genesis-sync-test-${{ matrix.fork }}-${{ matrix.offline_secs }}s
219+
runs-on: ubuntu-latest
220+
needs: dockerfile-ubuntu
221+
if: contains(github.event.pull_request.labels.*.name, 'syncing')
222+
strategy:
223+
matrix:
224+
fork: [electra, fulu]
225+
offline_secs: [120, 300]
226+
steps:
227+
- uses: actions/checkout@v4
228+
229+
- name: Install Kurtosis
230+
run: |
231+
echo "deb [trusted=yes] https://apt.fury.io/kurtosis-tech/ /" | sudo tee /etc/apt/sources.list.d/kurtosis.list
232+
sudo apt update
233+
sudo apt install -y kurtosis-cli
234+
kurtosis analytics disable
235+
236+
- name: Download Docker image artifact
237+
uses: actions/download-artifact@v4
238+
with:
239+
name: lighthouse-docker
240+
path: .
241+
242+
- name: Load Docker image
243+
run: docker load -i lighthouse-docker.tar
244+
245+
- name: Run the genesis sync test script
246+
run: |
247+
./genesis-sync.sh "sync-${{ matrix.fork }}-${{ matrix.offline_secs }}s" "genesis-sync-config-${{ matrix.fork }}.yaml" "${{ matrix.fork }}" "${{ matrix.offline_secs }}"
248+
working-directory: scripts/tests
249+
250+
- name: Upload logs artifact
251+
if: always()
252+
uses: actions/upload-artifact@v4
253+
with:
254+
name: logs-genesis-sync-${{ matrix.fork }}-${{ matrix.offline_secs }}s
255+
path: |
256+
scripts/local_testnet/logs
257+
retention-days: 3
258+
216259
# This job succeeds ONLY IF all others succeed. It is used by the merge queue to determine whether
217260
# a PR is safe to merge. New jobs should be added here.
218261
local-testnet-success:
@@ -228,5 +271,5 @@ jobs:
228271
- uses: actions/checkout@v4
229272
- name: Check that success job is dependent on all others
230273
run: |
231-
exclude_jobs='checkpoint-sync-test'
274+
exclude_jobs='checkpoint-sync-test|genesis-sync-test'
232275
./scripts/ci/check-success-job.sh ./.github/workflows/local-testnet.yml local-testnet-success "$exclude_jobs"

scripts/local_testnet/start_local_testnet.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ fi
8181
if [ "$BUILD_IMAGE" = true ]; then
8282
echo "Building Lighthouse Docker image."
8383
ROOT_DIR="$SCRIPT_DIR/../.."
84-
docker build --build-arg FEATURES=portable -f $ROOT_DIR/Dockerfile -t $LH_IMAGE_NAME $ROOT_DIR
84+
docker build --build-arg FEATURES=portable,spec-minimal -f $ROOT_DIR/Dockerfile -t $LH_IMAGE_NAME $ROOT_DIR
8585
else
8686
echo "Not rebuilding Lighthouse Docker image."
8787
fi
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Kurtosis config file for testing sync on a local devnet.
2+
participants:
3+
- cl_type: lighthouse
4+
cl_image: lighthouse:local
5+
count: 2
6+
# nodes without validators, used for testing sync.
7+
- cl_type: lighthouse
8+
cl_image: lighthouse:local
9+
supernode: true # no supernode in Electra, this is for future proof
10+
validator_count: 0
11+
- cl_type: lighthouse
12+
cl_image: lighthouse:local
13+
supernode: false
14+
validator_count: 0
15+
network_params:
16+
seconds_per_slot: 6
17+
electra_fork_epoch: 0
18+
preset: "minimal"
19+
additional_services:
20+
- tx_fuzz
21+
- spamoor
22+
global_log_level: debug
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Kurtosis config file for testing sync on a local devnet.
2+
participants:
3+
- cl_type: lighthouse
4+
cl_image: lighthouse:local
5+
count: 2
6+
# nodes without validators, used for testing sync.
7+
- cl_type: lighthouse
8+
cl_image: lighthouse:local
9+
supernode: true
10+
validator_count: 0
11+
- cl_type: lighthouse
12+
cl_image: lighthouse:local
13+
supernode: false
14+
validator_count: 0
15+
network_params:
16+
seconds_per_slot: 6
17+
fulu_fork_epoch: 0
18+
preset: "minimal"
19+
additional_services:
20+
- tx_fuzz
21+
- spamoor
22+
global_log_level: debug

scripts/tests/genesis-sync.sh

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Genesis sync test on a local network.
4+
#
5+
# Start a local testnet, shut down non-validator nodes for a period, then restart them
6+
# and monitor their sync progress from genesis to head.
7+
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
8+
9+
ENCLAVE_NAME=${1:-genesis-sync-testnet}
10+
CONFIG=${2:-$SCRIPT_DIR/genesis-sync-config-electra.yaml}
11+
FORK_TYPE=${3:-electra} # electra or fulu
12+
OFFLINE_DURATION_SECS=${4:-120} # stopped duration of non validating nodes
13+
14+
# Test configuration
15+
# ------------------------------------------------------
16+
# Interval for polling the /lighthouse/syncing endpoint for sync status
17+
# Reduce the polling time so that some progress can be seen
18+
POLL_INTERVAL_SECS=0.5
19+
# Timeout for this test, if the nodes fail to sync, fail the test.
20+
TIMEOUT_MINS=5
21+
TIMEOUT_SECS=$((TIMEOUT_MINS * 60))
22+
# ------------------------------------------------------
23+
24+
echo "Starting genesis sync test with:"
25+
echo " Fork: $FORK_TYPE"
26+
echo " Offline duration: ${OFFLINE_DURATION_SECS}s"
27+
28+
# Polls a node's sync status
29+
poll_node() {
30+
local node_type=$1
31+
local url=${node_urls[$node_type]}
32+
33+
response=$(curl -s "${url}/lighthouse/syncing" 2>/dev/null)
34+
35+
if [ -z "$response" ] || [ "$response" = "null" ]; then
36+
echo "${node_type} status: No response or null response"
37+
return
38+
fi
39+
40+
# Print syncing status
41+
sync_state=$(echo "$response" | jq -r 'if (.data | type) == "object" then "object" else "string" end' 2>/dev/null)
42+
43+
if [ "$sync_state" = "object" ]; then
44+
status=$(echo "$response" | jq -r '.data | keys[0] // "Unknown"')
45+
fields=$(echo "$response" | jq -r ".data.${status} | to_entries | map(\"\(.key): \(.value)\") | join(\", \")")
46+
echo "${node_type} status: ${status}, ${fields}"
47+
else
48+
status=$(echo "$response" | jq -r '.data' 2>/dev/null)
49+
echo "${node_type} status: ${status:-Unknown}"
50+
51+
# The test is complete when the node is synced
52+
if [ "$status" = "Synced" ]; then
53+
mark_node_complete "$node_type"
54+
fi
55+
fi
56+
}
57+
58+
# Marks a node as complete and record time
59+
mark_node_complete() {
60+
local node_type=$1
61+
if [ "${node_completed[$node_type]}" = false ]; then
62+
node_completed[$node_type]=true
63+
node_complete_time[$node_type]=$(date +%s)
64+
echo "${node_type} completed sync in $((node_complete_time[$node_type] - sync_start_time)) seconds"
65+
fi
66+
}
67+
68+
exit_and_dump_logs() {
69+
local exit_code=$1
70+
echo "Shutting down..."
71+
$SCRIPT_DIR/../local_testnet/stop_local_testnet.sh $ENCLAVE_NAME
72+
echo "Test completed with exit code $exit_code."
73+
exit $exit_code
74+
}
75+
76+
# Start the nodes
77+
$SCRIPT_DIR/../local_testnet/start_local_testnet.sh -e $ENCLAVE_NAME -b false -n $CONFIG
78+
if [ $? -ne 0 ]; then
79+
echo "Failed to start local testnet"
80+
exit_and_dump_logs 1
81+
fi
82+
83+
# Wait for 10s before stopping non-validating nodes
84+
sleep 10
85+
86+
# These are non validating nodes
87+
supernode="cl-3-lighthouse-geth"
88+
fullnode="cl-4-lighthouse-geth"
89+
90+
# Stop the non-validator nodes
91+
kurtosis service stop $ENCLAVE_NAME $supernode
92+
kurtosis service stop $ENCLAVE_NAME $fullnode
93+
94+
echo "Non-validator nodes stopped. Waiting ${OFFLINE_DURATION_SECS} seconds..."
95+
96+
# Display the time every 10s when the nodes are stopped
97+
remaining_time=$OFFLINE_DURATION_SECS
98+
while [ $remaining_time -gt 0 ]; do
99+
sleep 10
100+
remaining_time=$((remaining_time - 10))
101+
echo "Nodes are stopped for $((OFFLINE_DURATION_SECS - remaining_time))s, ${remaining_time}s remains..."
102+
done
103+
104+
echo "Resuming non-validator nodes..."
105+
106+
# Resume the non validating nodes
107+
kurtosis service start $ENCLAVE_NAME $supernode
108+
kurtosis service start $ENCLAVE_NAME $fullnode
109+
110+
# The time at which syncing starts after the node was stopped
111+
sync_start_time=$(date +%s)
112+
113+
# Get beacon API URLs for non validating nodes for query
114+
supernode_url=$(kurtosis port print $ENCLAVE_NAME $supernode http)
115+
fullnode_url=$(kurtosis port print $ENCLAVE_NAME $fullnode http)
116+
117+
# Initialize statuses
118+
declare -A node_completed
119+
declare -A node_complete_time
120+
declare -A node_urls
121+
122+
node_urls["supernode"]="$supernode_url"
123+
node_urls["fullnode"]="$fullnode_url"
124+
node_completed["supernode"]=false
125+
node_completed["fullnode"]=false
126+
127+
echo "Polling sync status until nodes are synced or timeout of ${TIMEOUT_MINS} mins"
128+
129+
while [ "${node_completed[supernode]}" = false ] || [ "${node_completed[fullnode]}" = false ]; do
130+
current_time=$(date +%s)
131+
elapsed=$((current_time - sync_start_time))
132+
133+
if [ "$elapsed" -ge "$TIMEOUT_SECS" ]; then
134+
echo "ERROR: Nodes timed out syncing after ${TIMEOUT_MINS} minutes. Exiting."
135+
exit_and_dump_logs 1
136+
fi
137+
138+
# Poll each node that hasn't completed yet
139+
for node in "supernode" "fullnode"; do
140+
if [ "${node_completed[$node]}" = false ]; then
141+
poll_node "$node"
142+
fi
143+
done
144+
145+
sleep $POLL_INTERVAL_SECS
146+
done
147+
148+
echo "Genesis sync test complete! Both supernode and fullnode have synced successfully."
149+
echo "Supernode time: $((node_complete_time[supernode] - sync_start_time)) seconds"
150+
echo "Fullnode time: $((node_complete_time[fullnode] - sync_start_time)) seconds"
151+
exit_and_dump_logs 0

0 commit comments

Comments
 (0)