Skip to content

Commit

Permalink
fix: metric check sp
Browse files Browse the repository at this point in the history
  • Loading branch information
BarryTong65 committed Jun 3, 2024
1 parent ac790ef commit 834e1f4
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
4 changes: 2 additions & 2 deletions base/gfspvgmgr/virtual_group_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ func (checker *HealthChecker) checkSPHealth(sp *sptypes.StorageProvider) bool {

resp, err := client.Do(req)
duration := time.Since(start)
metrics.SPHealthCheckerDuration.WithLabelValues(SPHealthCheckerDuration, util.Uint32ToString(sp.Id)).Observe(duration.Seconds())
metrics.SPHealthCheckerTime.WithLabelValues(SPHealthCheckerDuration, util.Uint32ToString(sp.Id)).Observe(duration.Seconds())
if err != nil {
log.CtxErrorw(context.Background(), "failed to connect to sp", "sp", sp, "error", err, "duration", duration)
time.Sleep(defaultSPHealthCheckerRetryInterval)
Expand All @@ -818,7 +818,7 @@ func (checker *HealthChecker) checkSPHealth(sp *sptypes.StorageProvider) bool {
log.CtxInfow(context.Background(), "succeed to check the sp healthy", "sp", sp, "duration", duration)
return true
} else {
metrics.SPHealthCheckerFailure.WithLabelValues(SPHealthCheckerFailure, util.Uint32ToString(sp.Id)).Inc()
metrics.SPHealthCheckerFailureCounter.WithLabelValues(SPHealthCheckerFailure, util.Uint32ToString(sp.Id)).Inc()
log.CtxErrorw(context.Background(), "failed to check sp healthy", "sp", sp, "http_status_code", resp.StatusCode, "duration", duration)
time.Sleep(defaultSPHealthCheckerRetryInterval)
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/metrics/metric_items.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ var MetricsItems = []prometheus.Collector{
ManagerCounter,
ManagerTime,
GCBlockNumberGauge,
SPHealthCheckerTime,
SPHealthCheckerFailureCounter,

// workflow metrics category
PerfApprovalTime,
Expand Down Expand Up @@ -247,14 +249,14 @@ var (
Name: "gc_block_number",
Help: "Track the next gc block number.",
}, []string{"gc_block_number"})
SPHealthCheckerDuration = prometheus.NewHistogramVec(
SPHealthCheckerTime = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "request_time",
Help: "Request duration in seconds.",
},
[]string{"request_time", "storage_provider"},
)
SPHealthCheckerFailure = prometheus.NewCounterVec(
SPHealthCheckerFailureCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "request_qps",
Help: "Request failure count.",
Expand Down

0 comments on commit 834e1f4

Please sign in to comment.