Skip to content

Commit ccd3382

Browse files
committed
[tmpnet] Deploy collectors with golang to simplify cross-repo use
Previously, prometheus and promtail were installed and launched by with bash scripts. Migrating installation to nix and launch to golang enables directly sharing the functionality with subnet-evm and hypersdk. No more having to copy and maintain copies of the scripts in multiple repos.
1 parent 757b07a commit ccd3382

File tree

9 files changed

+280
-230
lines changed

9 files changed

+280
-230
lines changed

.github/actions/run-monitored-tmpnet-cmd/action.yml

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,23 +38,6 @@ inputs:
3838
runs:
3939
using: composite
4040
steps:
41-
- name: Start prometheus
42-
# Only run for the original repo; a forked repo won't have access to the monitoring credentials
43-
if: (inputs.prometheus_username != '')
44-
shell: bash
45-
# Assumes calling project has a nix flake that ensures a compatible prometheus
46-
run: nix develop --impure --command bash -x ./scripts/run_prometheus.sh
47-
env:
48-
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
49-
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
50-
- name: Start promtail
51-
if: (inputs.prometheus_username != '')
52-
shell: bash
53-
# Assumes calling project has a nix flake that ensures a compatible promtail
54-
run: nix develop --impure --command bash -x ./scripts/run_promtail.sh
55-
env:
56-
LOKI_USERNAME: ${{ inputs.loki_username }}
57-
LOKI_PASSWORD: ${{ inputs.loki_password }}
5841
- name: Notify of metrics availability
5942
if: (inputs.prometheus_username != '')
6043
shell: bash
@@ -67,7 +50,11 @@ runs:
6750
shell: bash
6851
run: ${{ inputs.run }}
6952
env:
70-
TMPNET_DELAY_NETWORK_SHUTDOWN: true # Ensure shutdown waits for a final metrics scrape
53+
TMPNET_ENABLE_COLLECTORS: true
54+
LOKI_USERNAME: ${{ inputs.loki_username }}
55+
LOKI_PASSWORD: ${{ inputs.loki_password }}
56+
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
57+
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
7158
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
7259
GH_WORKFLOW: ${{ inputs.workflow }}
7360
GH_RUN_ID: ${{ inputs.run_id }}

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
- name: Run e2e tests
6363
uses: ./.github/actions/run-monitored-tmpnet-cmd
6464
with:
65-
run: E2E_SERIAL=1 ./scripts/tests.e2e.sh
65+
run: E2E_SERIAL=1 nix develop --impure --command bash -x ./scripts/tests.e2e.sh
6666
artifact_name: e2e-tmpnet-data
6767
filter_by_owner: avalanchego-e2e
6868
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
@@ -84,7 +84,7 @@ jobs:
8484
- name: Run e2e tests with existing network
8585
uses: ./.github/actions/run-monitored-tmpnet-cmd
8686
with:
87-
run: E2E_SERIAL=1 ./scripts/tests.e2e.existing.sh
87+
run: E2E_SERIAL=1 nix develop --impure --command bash -x ./scripts/tests.e2e.existing.sh
8888
artifact_name: e2e-existing-network-tmpnet-data
8989
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
9090
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
@@ -105,7 +105,7 @@ jobs:
105105
- name: Run e2e tests
106106
uses: ./.github/actions/run-monitored-tmpnet-cmd
107107
with:
108-
run: ./scripts/tests.upgrade.sh
108+
run: nix develop --impure --command bash -x ./scripts/tests.upgrade.sh
109109
artifact_name: upgrade-tmpnet-data
110110
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
111111
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}

scripts/run_prometheus.sh

Lines changed: 0 additions & 93 deletions
This file was deleted.

scripts/run_promtail.sh

Lines changed: 0 additions & 91 deletions
This file was deleted.

tests/fixture/e2e/env.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
130130
}
131131
}
132132

133+
if flagVars.EnableCollectors() {
134+
require.NoError(tmpnet.EnsureCollectorsRunning(tc.Log()))
135+
}
136+
133137
// Start a new network
134138
if network == nil {
135139
network = desiredNetwork

tests/fixture/e2e/flags.go

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,16 @@ import (
1515
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
1616
)
1717

18-
const (
19-
// Ensure that this value takes into account the scrape_interval
20-
// defined in scripts/run_prometheus.sh.
21-
networkShutdownDelay = 12 * time.Second
22-
23-
delayNetworkShutdownEnvName = "TMPNET_DELAY_NETWORK_SHUTDOWN"
24-
)
25-
2618
type FlagVars struct {
27-
avalancheGoExecPath string
28-
pluginDir string
29-
networkDir string
30-
reuseNetwork bool
31-
delayNetworkShutdown bool
32-
startNetwork bool
33-
stopNetwork bool
34-
restartNetwork bool
35-
nodeCount int
19+
avalancheGoExecPath string
20+
pluginDir string
21+
networkDir string
22+
reuseNetwork bool
23+
enableCollectors bool
24+
startNetwork bool
25+
stopNetwork bool
26+
restartNetwork bool
27+
nodeCount int
3628
}
3729

3830
func (v *FlagVars) AvalancheGoExecPath() (string, error) {
@@ -80,10 +72,14 @@ func (v *FlagVars) RestartNetwork() bool {
8072
return v.restartNetwork
8173
}
8274

75+
func (v *FlagVars) EnableCollectors() bool {
76+
return v.enableCollectors
77+
}
78+
8379
func (v *FlagVars) NetworkShutdownDelay() time.Duration {
84-
if v.delayNetworkShutdown {
80+
if v.enableCollectors {
8581
// Only return a non-zero value if the delay is enabled.
86-
return networkShutdownDelay
82+
return tmpnet.NetworkShutdownDelay
8783
}
8884
return 0
8985
}
@@ -147,10 +143,10 @@ func RegisterFlags() *FlagVars {
147143
"[optional] restart an existing network previously started with --reuse-network. Useful for ensuring a network is running with the current state of binaries on disk. Ignored if a network is not already running or --stop-network is provided.",
148144
)
149145
flag.BoolVar(
150-
&vars.delayNetworkShutdown,
151-
"delay-network-shutdown",
152-
cast.ToBool(GetEnvWithDefault(delayNetworkShutdownEnvName, "false")),
153-
"[optional] whether to delay network shutdown to allow a final metrics scrape.",
146+
&vars.enableCollectors,
147+
"enable-collectors",
148+
cast.ToBool(GetEnvWithDefault("TMPNET_ENABLE_COLLECTORS", "false")),
149+
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
154150
)
155151
flag.BoolVar(
156152
&vars.startNetwork,

tests/fixture/e2e/metrics_link.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var _ = ginkgo.AfterEach(func() {
4949
// Extend the end time by the shutdown delay (a proxy for the metrics
5050
// scrape interval) to maximize the chances of the specified duration
5151
// including all metrics relevant to the current spec.
52-
endTime := time.Now().Add(networkShutdownDelay).UnixMilli()
52+
endTime := time.Now().Add(tmpnet.NetworkShutdownDelay).UnixMilli()
5353
metricsLink := tmpnet.MetricsLinkForNetwork(
5454
env.GetNetwork().UUID,
5555
strconv.FormatInt(startTime, 10),

tests/fixture/tmpnet/node_process.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,12 @@ func (p *NodeProcess) getProcess() (*os.Process, error) {
226226
return nil, nil
227227
}
228228

229-
proc, err := os.FindProcess(p.pid)
229+
return getProcess(p.pid)
230+
}
231+
232+
// getProcess retrieves the process if it is running.
233+
func getProcess(pid int) (*os.Process, error) {
234+
proc, err := os.FindProcess(pid)
230235
if err != nil {
231236
return nil, fmt.Errorf("failed to find process: %w", err)
232237
}

0 commit comments

Comments
 (0)