Skip to content

Commit 15c496b

Browse files
committed
[tmpnet] Deploy collectors with golang to simplify cross-repo use
Previously, prometheus and promtail were installed and launched by with bash scripts. Migrating installation to nix and launch to golang enables directly sharing the functionality with subnet-evm and hypersdk. No more having to copy and maintain copies of the scripts in multiple repos.
1 parent 827d7ea commit 15c496b

File tree

9 files changed

+279
-225
lines changed

9 files changed

+279
-225
lines changed

.github/actions/run-monitored-tmpnet-cmd/action.yml

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,6 @@ inputs:
3636
runs:
3737
using: composite
3838
steps:
39-
- name: Start prometheus
40-
# Only run for the original repo; a forked repo won't have access to the monitoring credentials
41-
if: (inputs.prometheus_username != '')
42-
shell: bash
43-
# Assumes calling project has a nix flake that ensures a compatible prometheus
44-
run: nix develop --impure --command bash -x ./scripts/run_prometheus.sh
45-
env:
46-
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
47-
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
48-
- name: Start promtail
49-
if: (inputs.prometheus_username != '')
50-
shell: bash
51-
# Assumes calling project has a nix flake that ensures a compatible promtail
52-
run: nix develop --impure --command bash -x ./scripts/run_promtail.sh
53-
env:
54-
LOKI_USERNAME: ${{ inputs.loki_username }}
55-
LOKI_PASSWORD: ${{ inputs.loki_password }}
5639
- name: Notify of metrics availability
5740
if: (inputs.prometheus_username != '')
5841
shell: bash
@@ -65,6 +48,10 @@ runs:
6548
shell: bash
6649
run: ${{ inputs.run }}
6750
env:
51+
LOKI_USERNAME: ${{ inputs.loki_username }}
52+
LOKI_PASSWORD: ${{ inputs.loki_password }}
53+
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
54+
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
6855
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
6956
GH_WORKFLOW: ${{ inputs.workflow }}
7057
GH_RUN_ID: ${{ inputs.run_id }}

.github/workflows/ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
- name: Run e2e tests
6363
uses: ./.github/actions/run-monitored-tmpnet-cmd
6464
with:
65-
run: E2E_SERIAL=1 ./scripts/tests.e2e.sh --delay-network-shutdown
65+
run: E2E_SERIAL=1 nix develop --impure --command bash -x ./scripts/tests.e2e.sh --enable-collectors
6666
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
6767
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
6868
loki_username: ${{ secrets.LOKI_ID || '' }}
@@ -87,7 +87,7 @@ jobs:
8787
- name: Run e2e tests with existing network
8888
uses: ./.github/actions/run-monitored-tmpnet-cmd
8989
with:
90-
run: E2E_SERIAL=1 ./scripts/tests.e2e.existing.sh --delay-network-shutdown
90+
run: E2E_SERIAL=1 nix develop --impure --command bash -x ./scripts/tests.e2e.existing.sh --enable-collectors
9191
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
9292
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
9393
loki_username: ${{ secrets.LOKI_ID || '' }}
@@ -112,8 +112,8 @@ jobs:
112112
- name: Run e2e tests
113113
uses: ./.github/actions/run-monitored-tmpnet-cmd
114114
with:
115-
run: ./scripts/tests.upgrade.sh
116-
filter_by_owner: avalanchego-e2e
115+
run: nix develop --impure --command bash -x ./scripts/tests.upgrade.sh
116+
filter_by_owner: avalanchego-upgrade
117117
prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }}
118118
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
119119
loki_username: ${{ secrets.LOKI_ID || '' }}

scripts/run_prometheus.sh

Lines changed: 0 additions & 93 deletions
This file was deleted.

scripts/run_promtail.sh

Lines changed: 0 additions & 91 deletions
This file was deleted.

tests/fixture/e2e/env.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ func NewTestEnvironment(tc tests.TestContext, flagVars *FlagVars, desiredNetwork
130130
}
131131
}
132132

133+
if flagVars.EnableCollectors() {
134+
require.NoError(tmpnet.EnsureCollectorsRunning(tc.Log()))
135+
}
136+
133137
// Start a new network
134138
if network == nil {
135139
network = desiredNetwork

tests/fixture/e2e/flags.go

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,16 @@ import (
1212
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
1313
)
1414

15-
// Ensure that this value takes into account the scrape_interval
16-
// defined in scripts/run_prometheus.sh.
17-
const networkShutdownDelay = 12 * time.Second
18-
1915
type FlagVars struct {
20-
avalancheGoExecPath string
21-
pluginDir string
22-
networkDir string
23-
reuseNetwork bool
24-
delayNetworkShutdown bool
25-
startNetwork bool
26-
stopNetwork bool
27-
restartNetwork bool
28-
nodeCount int
16+
avalancheGoExecPath string
17+
pluginDir string
18+
networkDir string
19+
reuseNetwork bool
20+
enableCollectors bool
21+
startNetwork bool
22+
stopNetwork bool
23+
restartNetwork bool
24+
nodeCount int
2925
}
3026

3127
func (v *FlagVars) AvalancheGoExecPath() string {
@@ -54,10 +50,14 @@ func (v *FlagVars) RestartNetwork() bool {
5450
return v.restartNetwork
5551
}
5652

53+
func (v *FlagVars) EnableCollectors() bool {
54+
return v.enableCollectors
55+
}
56+
5757
func (v *FlagVars) NetworkShutdownDelay() time.Duration {
58-
if v.delayNetworkShutdown {
58+
if v.enableCollectors {
5959
// Only return a non-zero value if the delay is enabled.
60-
return networkShutdownDelay
60+
return tmpnet.NetworkShutdownDelay
6161
}
6262
return 0
6363
}
@@ -121,10 +121,10 @@ func RegisterFlags() *FlagVars {
121121
"[optional] restart an existing network previously started with --reuse-network. Useful for ensuring a network is running with the current state of binaries on disk. Ignored if a network is not already running or --stop-network is provided.",
122122
)
123123
flag.BoolVar(
124-
&vars.delayNetworkShutdown,
125-
"delay-network-shutdown",
124+
&vars.enableCollectors,
125+
"enable-collectors",
126126
false,
127-
"[optional] whether to delay network shutdown to allow a final metrics scrape.",
127+
"[optional] whether to enable collectors of logs and metrics from nodes of the temporary network.",
128128
)
129129
flag.BoolVar(
130130
&vars.startNetwork,

tests/fixture/e2e/metrics_link.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var _ = ginkgo.AfterEach(func() {
4949
// Extend the end time by the shutdown delay (a proxy for the metrics
5050
// scrape interval) to maximize the chances of the specified duration
5151
// including all metrics relevant to the current spec.
52-
endTime := time.Now().Add(networkShutdownDelay).UnixMilli()
52+
endTime := time.Now().Add(tmpnet.NetworkShutdownDelay).UnixMilli()
5353
metricsLink := tmpnet.MetricsLinkForNetwork(
5454
env.GetNetwork().UUID,
5555
strconv.FormatInt(startTime, 10),

tests/fixture/tmpnet/node_process.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,12 @@ func (p *NodeProcess) getProcess() (*os.Process, error) {
225225
return nil, nil
226226
}
227227

228-
proc, err := os.FindProcess(p.pid)
228+
return getProcess(p.pid)
229+
}
230+
231+
// getProcess retrieves the process if it is running.
232+
func getProcess(pid int) (*os.Process, error) {
233+
proc, err := os.FindProcess(pid)
229234
if err != nil {
230235
return nil, fmt.Errorf("failed to find process: %w", err)
231236
}

0 commit comments

Comments
 (0)