Skip to content

Commit f38dc62

Browse files
authored
e2e: Migrate duplicate node id test from kurtosis (#1573)
1 parent b293d9e commit f38dc62

File tree

12 files changed

+316
-34
lines changed

12 files changed

+316
-34
lines changed

.github/workflows/test.e2e.persistent.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
run: ./scripts/build.sh -r
2929
- name: Run e2e tests with persistent network
3030
shell: bash
31-
run: ./scripts/tests.e2e.persistent.sh ./build/avalanchego
31+
run: E2E_SERIAL=1 ./scripts/tests.e2e.persistent.sh ./build/avalanchego
3232
- name: Upload testnet network dir
3333
uses: actions/upload-artifact@v3
3434
with:

.github/workflows/test.e2e.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
run: ./scripts/build.sh -r
2929
- name: Run e2e tests
3030
shell: bash
31-
run: ./scripts/tests.e2e.sh ./build/avalanchego
31+
run: E2E_SERIAL=1 ./scripts/tests.e2e.sh ./build/avalanchego
3232
- name: Upload testnet network dir
3333
uses: actions/upload-artifact@v3
3434
with:

scripts/tests.e2e.sh

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ set -euo pipefail
55
# e.g.,
66
# ./scripts/build.sh
77
# ./scripts/tests.e2e.sh ./build/avalanchego
8+
# E2E_SERIAL=1 ./scripts/tests.e2e.sh ./build/avalanchego
89
if ! [[ "$0" =~ scripts/tests.e2e.sh ]]; then
910
echo "must be run from repository root"
1011
exit 255
@@ -43,10 +44,28 @@ else
4344
fi
4445

4546
#################################
46-
# - Execute in parallel (-p) with the ginkgo cli to minimize execution time.
47-
# The test binary by itself isn't capable of running specs in parallel.
47+
# Determine ginkgo args
48+
GINKGO_ARGS=""
49+
if [[ -n "${E2E_SERIAL:-}" ]]; then
50+
# Specs will be executed serially. This supports running e2e tests in CI
51+
# where parallel execution of tests that start new nodes beyond the
52+
# initial set of validators could overload the free tier CI workers.
53+
# Forcing serial execution in this test script instead of marking
54+
# resource-hungry tests as serial supports executing the test suite faster
55+
# on powerful development workstations.
56+
echo "tests will be executed serially to minimize resource requirements"
57+
else
58+
# Enable parallel execution of specs defined in the test binary by
59+
# default. This requires invoking the binary via the ginkgo cli
60+
# since the test binary isn't capable of executing specs in
61+
# parallel.
62+
echo "tests will be executed in parallel"
63+
GINKGO_ARGS="-p"
64+
fi
65+
66+
#################################
4867
# - Execute in random order to identify unwanted dependency
49-
ginkgo -p -v --randomize-all ./tests/e2e/e2e.test -- ${E2E_ARGS} \
68+
ginkgo ${GINKGO_ARGS} -v --randomize-all ./tests/e2e/e2e.test -- ${E2E_ARGS} \
5069
&& EXIT_CODE=$? || EXIT_CODE=$?
5170

5271
if [[ ${EXIT_CODE} -gt 0 ]]; then

tests/e2e/e2e.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ const (
3636
// Enough for test/custom networks.
3737
DefaultConfirmTxTimeout = 20 * time.Second
3838

39+
// This interval should represent the upper bound of the time
40+
// required to start a new node on a local test network.
41+
DefaultNodeStartTimeout = 20 * time.Second
42+
3943
// A long default timeout used to timeout failed operations but
4044
// unlikely to induce flaking due to unexpected resource
4145
// contention.
@@ -168,3 +172,29 @@ func Eventually(condition func() bool, waitFor time.Duration, tick time.Duration
168172
}
169173
}
170174
}
175+
176+
// Add an ephemeral node that is only intended to be used by a single test. Its ID and
177+
// URI are not intended to be returned from the Network instance to minimize
178+
// accessibility from other tests.
179+
func AddEphemeralNode(network testnet.Network, flags testnet.FlagsMap) testnet.Node {
180+
require := require.New(ginkgo.GinkgoT())
181+
182+
node, err := network.AddEphemeralNode(ginkgo.GinkgoWriter, flags)
183+
require.NoError(err)
184+
185+
// Ensure node is stopped on teardown. It's configuration is not removed to enable
186+
// collection in CI to aid in troubleshooting failures.
187+
ginkgo.DeferCleanup(func() {
188+
tests.Outf("Shutting down ephemeral node %s\n", node.GetID())
189+
require.NoError(node.Stop())
190+
})
191+
192+
return node
193+
}
194+
195+
// Wait for the given node to report healthy.
196+
func WaitForHealthy(node testnet.Node) {
197+
ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
198+
defer cancel()
199+
require.NoError(ginkgo.GinkgoT(), testnet.WaitForHealthy(ctx, node))
200+
}

tests/e2e/e2e_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
// ensure test packages are scanned by ginkgo
2727
_ "github.com/ava-labs/avalanchego/tests/e2e/banff"
2828
_ "github.com/ava-labs/avalanchego/tests/e2e/c"
29+
_ "github.com/ava-labs/avalanchego/tests/e2e/faultinjection"
2930
_ "github.com/ava-labs/avalanchego/tests/e2e/p"
3031
_ "github.com/ava-labs/avalanchego/tests/e2e/static-handlers"
3132
_ "github.com/ava-labs/avalanchego/tests/e2e/x"
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright (C) 2019-2023, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package faultinjection
5+
6+
import (
7+
"context"
8+
"fmt"
9+
10+
ginkgo "github.com/onsi/ginkgo/v2"
11+
12+
"github.com/stretchr/testify/require"
13+
14+
"github.com/ava-labs/avalanchego/api/info"
15+
"github.com/ava-labs/avalanchego/config"
16+
"github.com/ava-labs/avalanchego/ids"
17+
"github.com/ava-labs/avalanchego/tests/e2e"
18+
"github.com/ava-labs/avalanchego/tests/fixture/testnet"
19+
"github.com/ava-labs/avalanchego/utils/set"
20+
)
21+
22+
var _ = ginkgo.Describe("Duplicate node handling", func() {
23+
require := require.New(ginkgo.GinkgoT())
24+
25+
ginkgo.It("should ensure that a given Node ID (i.e. staking keypair) can be used at most once on a network", func() {
26+
network := e2e.Env.GetNetwork()
27+
nodes := network.GetNodes()
28+
29+
ginkgo.By("creating new node")
30+
node1 := e2e.AddEphemeralNode(network, testnet.FlagsMap{})
31+
e2e.WaitForHealthy(node1)
32+
33+
ginkgo.By("checking that the new node is connected to its peers")
34+
checkConnectedPeers(nodes, node1)
35+
36+
ginkgo.By("creating a second new node with the same staking keypair as the first new node")
37+
node1Flags := node1.GetConfig().Flags
38+
node2Flags := testnet.FlagsMap{
39+
config.StakingTLSKeyContentKey: node1Flags[config.StakingTLSKeyContentKey],
40+
config.StakingCertContentKey: node1Flags[config.StakingCertContentKey],
41+
// Construct a unique data dir to ensure the two nodes' data will be stored
42+
// separately. Usually the dir name is the node ID but in this one case the nodes have
43+
// the same node ID.
44+
config.DataDirKey: fmt.Sprintf("%s-second", node1Flags[config.DataDirKey]),
45+
}
46+
node2 := e2e.AddEphemeralNode(network, node2Flags)
47+
48+
ginkgo.By("checking that the second new node fails to become healthy before timeout")
49+
err := testnet.WaitForHealthy(e2e.DefaultContext(), node2)
50+
require.ErrorIs(err, context.DeadlineExceeded)
51+
52+
ginkgo.By("stopping the first new node")
53+
require.NoError(node1.Stop())
54+
55+
ginkgo.By("checking that the second new node becomes healthy within timeout")
56+
e2e.WaitForHealthy(node2)
57+
58+
ginkgo.By("checking that the second new node is connected to its peers")
59+
checkConnectedPeers(nodes, node2)
60+
})
61+
})
62+
63+
// Check that a new node is connected to existing nodes and vice versa
64+
func checkConnectedPeers(existingNodes []testnet.Node, newNode testnet.Node) {
65+
require := require.New(ginkgo.GinkgoT())
66+
67+
// Collect the node ids of the new node's peers
68+
infoClient := info.NewClient(newNode.GetProcessContext().URI)
69+
peers, err := infoClient.Peers(context.Background())
70+
require.NoError(err)
71+
peerIDs := set.NewSet[ids.NodeID](len(existingNodes))
72+
for _, peer := range peers {
73+
peerIDs.Add(peer.ID)
74+
}
75+
76+
newNodeID := newNode.GetID()
77+
for _, existingNode := range existingNodes {
78+
// Check that the existing node is a peer of the new node
79+
require.True(peerIDs.Contains(existingNode.GetID()))
80+
81+
// Check that the new node is a peer
82+
infoClient := info.NewClient(existingNode.GetProcessContext().URI)
83+
peers, err := infoClient.Peers(context.Background())
84+
require.NoError(err)
85+
isPeer := false
86+
for _, peer := range peers {
87+
if peer.ID == newNodeID {
88+
isPeer = true
89+
break
90+
}
91+
}
92+
require.True(isPeer)
93+
}
94+
}

tests/fixture/testnet/common.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Copyright (C) 2019-2023, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package testnet
5+
6+
import (
7+
"context"
8+
"errors"
9+
"fmt"
10+
"time"
11+
)
12+
13+
const (
14+
DefaultNodeTickerInterval = 50 * time.Millisecond
15+
)
16+
17+
var ErrNotRunning = errors.New("not running")
18+
19+
// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.
20+
func WaitForHealthy(ctx context.Context, node Node) error {
21+
if _, ok := ctx.Deadline(); !ok {
22+
return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", node.GetID())
23+
}
24+
ticker := time.NewTicker(DefaultNodeTickerInterval)
25+
defer ticker.Stop()
26+
27+
for {
28+
healthy, err := node.IsHealthy(ctx)
29+
if err != nil && !errors.Is(err, ErrNotRunning) {
30+
return fmt.Errorf("failed to wait for health of node %q: %w", node.GetID(), err)
31+
}
32+
if healthy {
33+
return nil
34+
}
35+
36+
select {
37+
case <-ctx.Done():
38+
return fmt.Errorf("failed to wait for health of node %q before timeout: %w", node.GetID(), ctx.Err())
39+
case <-ticker.C:
40+
}
41+
}
42+
}

tests/fixture/testnet/interfaces.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
package testnet
55

66
import (
7+
"context"
8+
"io"
9+
710
"github.com/ava-labs/avalanchego/ids"
811
"github.com/ava-labs/avalanchego/node"
912
)
@@ -12,11 +15,14 @@ import (
1215
type Network interface {
1316
GetConfig() NetworkConfig
1417
GetNodes() []Node
18+
AddEphemeralNode(w io.Writer, flags FlagsMap) (Node, error)
1519
}
1620

1721
// Defines node capabilities supportable regardless of how a network is orchestrated.
1822
type Node interface {
1923
GetID() ids.NodeID
2024
GetConfig() NodeConfig
2125
GetProcessContext() node.NodeProcessContext
26+
IsHealthy(ctx context.Context) (bool, error)
27+
Stop() error
2228
}

tests/fixture/testnet/local/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,10 @@ HOME
152152
│ └── config.json // C-Chain config for all nodes
153153
├── defaults.json // Default flags and configuration for network
154154
├── genesis.json // Genesis for all nodes
155-
└── network.env // Sets network dir env to simplify use of network
155+
├── network.env // Sets network dir env to simplify use of network
156+
└── ephemeral // Parent directory for ephemeral nodes (e.g. created by tests)
157+
└─ NodeID-FdxnAvr4jK9XXAwsYZPgWAHW2QnwSZ // Data dir for an ephemeral node
158+
└── ...
156159
157160
```
158161

tests/fixture/testnet/local/config.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ const (
2020
DefaultNetworkStartTimeout = 2 * time.Minute
2121
DefaultNodeInitTimeout = 10 * time.Second
2222
DefaultNodeStopTimeout = 5 * time.Second
23-
DefaultNodeTickerInterval = 50 * time.Millisecond
2423
)
2524

2625
// A set of flags appropriate for local testing.

0 commit comments

Comments
 (0)