Skip to content

Commit

Permalink
Addressed PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
pinebit committed Apr 16, 2024
1 parent b1e778a commit 964e32c
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 3 deletions.
3 changes: 3 additions & 0 deletions cmd/markdown_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ All metrics contain the following labels, so they are omitted from the table bel
The 'cluster_*' labels uniquely identify a specific node's metrics which is required
when storing metrics from multiple nodes or clusters in one Prometheus instance.
Several 'relay_p2p' metrics having 'int_' prefix are produced by libp2p relay implementation.
[These](https://github.com/libp2p/go-libp2p/pull/2154) are not mentioned in this doc.
| Name | Type | Help | Labels |
|---|---|---|---|
{{ range . }}| '{{ .FQName }}' | {{ .Type }} | {{ .Help }} | '{{ Join .Labels ", " }}' |
Expand Down
90 changes: 90 additions & 0 deletions cmd/relay/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,96 @@ var (
Name: "ping_latency",
Help: "Ping latency by peer and cluster",
}, []string{"peer", "peer_cluster"})

// Relay metrics produced by libp2p.
// These are prefixed with "int_" to avoid conflicts with other metrics.

intStatus = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_status",
Help: "Relay Status",
},
)

intReservationsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_reservations_total",
Help: "Relay Reservation Request",
},
[]string{"type"},
)

intReservationRequestResponseStatusTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_reservation_request_response_status_total",
Help: "Relay Reservation Request Response Status",
},
[]string{"status"},
)

intReservationRejectionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_reservation_rejections_total",
Help: "Relay Reservation Rejected Reason",
},
[]string{"reason"},
)

intConnectionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_connections_total",
Help: "Relay Connection Total",
},
[]string{"type"},
)

intConnectionRequestResponseStatusTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_connection_request_response_status_total",
Help: "Relay Connection Request Status",
},
[]string{"status"},
)

intConnectionRejectionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_connection_rejections_total",
Help: "Relay Connection Rejected Reason",
},
[]string{"reason"},
)

intConnectionDurationSeconds = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_connection_duration_seconds",
Help: "Relay Connection Duration",
},
)

intDataTransferredBytesTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "relay",
Subsystem: "p2p",
Name: "int_data_transferred_bytes_total",
Help: "Bytes Transferred Total",
},
)
)

// newBandwidthCounter returns a new bandwidth counter that stops counting when the context is cancelled.
Expand Down
159 changes: 159 additions & 0 deletions cmd/relay/metrics_tracer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright © 2022-2024 Obol Labs Inc. Licensed under the terms of a Business Source License 1.1

package relay

import (
"time"

"github.com/libp2p/go-libp2p/p2p/metricshelper"
pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb"
"github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/relay"
"github.com/prometheus/client_golang/prometheus"
)

// This implementation must be kept in sync with metricsTracer in
// https://github.com/libp2p/go-libp2p/blob/master/p2p/protocol/circuitv2/relay/metrics.go

const (
requestStatusOK = "ok"
requestStatusRejected = "rejected"
requestStatusError = "error"
)

var collectors = []prometheus.Collector{
intStatus,
intReservationsTotal,
intReservationRequestResponseStatusTotal,
intReservationRejectionsTotal,
intConnectionsTotal,
intConnectionRequestResponseStatusTotal,
intConnectionRejectionsTotal,
intConnectionDurationSeconds,
intDataTransferredBytesTotal,
}

func NewMetricsTracer(promRegisterer prometheus.Registerer) relay.MetricsTracer {
metricshelper.RegisterCollectors(promRegisterer, collectors...)

return &metricsTracer{}
}

type metricsTracer struct{}

func (*metricsTracer) RelayStatus(enabled bool) {
if enabled {
intStatus.Set(1)
} else {
intStatus.Set(0)
}
}

func (*metricsTracer) ConnectionOpened() {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, "opened")

intConnectionsTotal.WithLabelValues(*tags...).Add(1)
}

func (*metricsTracer) ConnectionClosed(d time.Duration) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, "closed")

intConnectionsTotal.WithLabelValues(*tags...).Add(1)
intConnectionDurationSeconds.Observe(d.Seconds())
}

func (*metricsTracer) ConnectionRequestHandled(status pbv2.Status) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)

respStatus := getResponseStatus(status)

*tags = append(*tags, respStatus)
intConnectionRequestResponseStatusTotal.WithLabelValues(*tags...).Add(1)
if respStatus == requestStatusRejected {
*tags = (*tags)[:0]
*tags = append(*tags, getRejectionReason(status))
intConnectionRejectionsTotal.WithLabelValues(*tags...).Add(1)
}
}

func (*metricsTracer) ReservationAllowed(isRenewal bool) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
if isRenewal {
*tags = append(*tags, "renewed")
} else {
*tags = append(*tags, "opened")
}

intReservationsTotal.WithLabelValues(*tags...).Add(1)
}

func (*metricsTracer) ReservationClosed(cnt int) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)
*tags = append(*tags, "closed")

intReservationsTotal.WithLabelValues(*tags...).Add(float64(cnt))
}

func (*metricsTracer) ReservationRequestHandled(status pbv2.Status) {
tags := metricshelper.GetStringSlice()
defer metricshelper.PutStringSlice(tags)

respStatus := getResponseStatus(status)

*tags = append(*tags, respStatus)
intReservationRequestResponseStatusTotal.WithLabelValues(*tags...).Add(1)
if respStatus == requestStatusRejected {
*tags = (*tags)[:0]
*tags = append(*tags, getRejectionReason(status))
intReservationRejectionsTotal.WithLabelValues(*tags...).Add(1)
}
}

func (*metricsTracer) BytesTransferred(cnt int) {
intDataTransferredBytesTotal.Add(float64(cnt))
}

func getResponseStatus(status pbv2.Status) string {
responseStatus := "unknown"
switch status {
case pbv2.Status_RESERVATION_REFUSED,
pbv2.Status_RESOURCE_LIMIT_EXCEEDED,
pbv2.Status_PERMISSION_DENIED,
pbv2.Status_NO_RESERVATION,
pbv2.Status_MALFORMED_MESSAGE:

responseStatus = requestStatusRejected
case pbv2.Status_UNEXPECTED_MESSAGE, pbv2.Status_CONNECTION_FAILED:
responseStatus = requestStatusError
case pbv2.Status_OK:
responseStatus = requestStatusOK
default:
}

return responseStatus
}

func getRejectionReason(status pbv2.Status) string {
reason := "unknown"
switch status {
case pbv2.Status_RESERVATION_REFUSED:
reason = "ip constraint violation"
case pbv2.Status_RESOURCE_LIMIT_EXCEEDED:
reason = "resource limit exceeded"
case pbv2.Status_PERMISSION_DENIED:
reason = "permission denied"
case pbv2.Status_NO_RESERVATION:
reason = "no reservation"
case pbv2.Status_MALFORMED_MESSAGE:
reason = "malformed message"
default:
}

return reason
}
78 changes: 78 additions & 0 deletions cmd/relay/metrics_tracer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright © 2022-2024 Obol Labs Inc. Licensed under the terms of a Business Source License 1.1

package relay_test

import (
"strings"
"testing"

pbv2 "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/pb"
p2p_relay "github.com/libp2p/go-libp2p/p2p/protocol/circuitv2/relay"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"

"github.com/obolnetwork/charon/app/promauto"
"github.com/obolnetwork/charon/cmd/relay"
)

func TestMetricsTracer(t *testing.T) {
// The test runs both charon and libp2p MetricsTracer instances
// and compares the metric values produced by both.
// The only difference is metric namespace and name prefixes.

charonReg, err := promauto.NewRegistry(prometheus.Labels{})
require.NoError(t, err)
charonMT := relay.NewMetricsTracer(charonReg)

libp2pReg, err := promauto.NewRegistry(prometheus.Labels{})
require.NoError(t, err)
libp2pMT := p2p_relay.NewMetricsTracer(p2p_relay.WithRegisterer(libp2pReg))

charonMT.RelayStatus(true)
libp2pMT.RelayStatus(true)
charonMT.BytesTransferred(100)
libp2pMT.BytesTransferred(100)
charonMT.ConnectionOpened()
libp2pMT.ConnectionOpened()
charonMT.ConnectionClosed(0)
libp2pMT.ConnectionClosed(0)
charonMT.ReservationRequestHandled(pbv2.Status_CONNECTION_FAILED)
libp2pMT.ReservationRequestHandled(pbv2.Status_CONNECTION_FAILED)
charonMT.ReservationAllowed(true)
libp2pMT.ReservationAllowed(true)
charonMT.ConnectionRequestHandled(pbv2.Status_CONNECTION_FAILED)
libp2pMT.ConnectionRequestHandled(pbv2.Status_CONNECTION_FAILED)
charonMT.ReservationClosed(123)
libp2pMT.ReservationClosed(123)

charonMetrics, err := charonReg.Gather()
require.NoError(t, err)

libp2pMetrics, err := libp2pReg.Gather()
require.NoError(t, err)

libp2pMetricsMap := make(map[string][]string)
for _, lm := range libp2pMetrics {
if strings.HasPrefix(lm.GetName(), "libp2p_relaysvc_") {
name := strings.TrimPrefix(lm.GetName(), "libp2p_relaysvc_")
var vals []string
for _, m := range lm.GetMetric() {
// timing fields go always last, safe to trim as they will differ
vals = append(vals, strings.Split(m.String(), "seconds:")[0])
}
libp2pMetricsMap[name] = vals
}
}

for _, cm := range charonMetrics {
if strings.HasPrefix(cm.GetName(), "relay_p2p_int_") {
name := strings.TrimPrefix(cm.GetName(), "relay_p2p_int_")
charonVals := cm.GetMetric()
libp2pVals := libp2pMetricsMap[name]
for i := 0; i < len(charonVals); i++ {
exp := strings.Split(charonVals[i].String(), "seconds:")[0]
require.Equal(t, exp, libp2pVals[i])
}
}
}
}
3 changes: 1 addition & 2 deletions cmd/relay/p2p.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ func startP2P(ctx context.Context, config Config, key *k1.PrivateKey, reporter m
relayResources.MaxReservations = config.MaxConns
relayResources.MaxCircuits = config.MaxResPerPeer

// This enables relay metrics: https://github.com/libp2p/go-libp2p/blob/master/p2p/protocol/circuitv2/relay/metrics.go
mt := relay.NewMetricsTracer(relay.WithRegisterer(promRegistry))
mt := NewMetricsTracer(promRegistry)
relayService, err := relay.New(tcpNode, relay.WithResources(relayResources), relay.WithMetricsTracer(mt))
if err != nil {
return nil, nil, errors.Wrap(err, "new relay service")
Expand Down
2 changes: 1 addition & 1 deletion cmd/relay/relay_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,6 @@ func TestRelayMetricsExported(t *testing.T) {
}

require.Eventually(t, func() bool {
return strings.Contains(fetchMetrics(), "libp2p_relaysvc_")
return strings.Contains(fetchMetrics(), "relay_p2p_int_status")
}, 10*time.Second, time.Second, "waiting for relay service to start")
}
3 changes: 3 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ All metrics contain the following labels, so they are omitted from the table bel
The `cluster_*` labels uniquely identify a specific node`s metrics which is required
when storing metrics from multiple nodes or clusters in one Prometheus instance.

Several `relay_p2p` metrics having `int_` prefix are produced by libp2p relay implementation.
[These](https://github.com/libp2p/go-libp2p/pull/2154) are not mentioned in this doc.

| Name | Type | Help | Labels |
|---|---|---|---|
| `app_beacon_node_peers` | Gauge | Gauge set to the peer count of the upstream beacon node | |
Expand Down

0 comments on commit 964e32c

Please sign in to comment.