From 1a75e05e435d34fa8af6eb922dc6b445077671db Mon Sep 17 00:00:00 2001 From: Sebastian Rabenhorst <4246554+rabenhorst@users.noreply.github.com> Date: Thu, 30 May 2024 00:03:54 +0200 Subject: [PATCH] Add `pingdom_min_request_limit` internal metric (#15) * Added pingdom_min_request_limit internal metric * Naming * Added description in readme * Renamed remaining requests metric * Fixed description of pingdom_rate_limit_remaining_requests in readme --- README.md | 23 +++++++++-------- cmd/pingdom-exporter/main.go | 15 ++++++++++- pkg/pingdom/check.go | 40 +++++++++++++++++++++++++---- pkg/pingdom/check_test.go | 49 +++++++++++++++++++++++++++++++++++- 4 files changed, 109 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index e50a217..d74f4e6 100644 --- a/README.md +++ b/README.md @@ -61,17 +61,18 @@ on how to build your own image and push it to your private registry. ## Exported Metrics -| Metric Name | Description | -| --------------------------------------------------- | ------------------------------------------------------------------------------- | -| `pingdom_up` | Was the last query on Pingdom API successful | -| `pingdom_uptime_status` | The current status of the check (1: up, 0: down) | -| `pingdom_uptime_response_time_seconds` | The response time of last test, in seconds | -| `pingdom_slo_period_seconds` | Outage check period, in seconds (see `-outage-check-period` flag) | -| `pingdom_outages_total` | Number of outages within the outage check period | -| `pingdom_down_seconds` | Total down time within the outage check period, in seconds | -| `pingdom_up_seconds` | Total up time within the outage check period, in seconds | -| `pingdom_uptime_slo_error_budget_total_seconds` | Maximum number of allowed downtime, in seconds, according to the uptime SLO | -| `pingdom_uptime_slo_error_budget_available_seconds` | Number of seconds of downtime we can still have without breaking the uptime SLO | +| Metric Name | Description | +| --------------------------------------------------- |----------------------------------------------------------------------------------------------------------| +| `pingdom_up` | Was the last query on Pingdom API successful | +| `pingdom_rate_limit_remaining_requests` | The remaining requests allowed before hitting the short-term or long-term rate limit in the Pingdom API. | +| `pingdom_uptime_status` | The current status of the check (1: up, 0: down) | +| `pingdom_uptime_response_time_seconds` | The response time of last test, in seconds | +| `pingdom_slo_period_seconds` | Outage check period, in seconds (see `-outage-check-period` flag) | +| `pingdom_outages_total` | Number of outages within the outage check period | +| `pingdom_down_seconds` | Total down time within the outage check period, in seconds | +| `pingdom_up_seconds` | Total up time within the outage check period, in seconds | +| `pingdom_uptime_slo_error_budget_total_seconds` | Maximum number of allowed downtime, in seconds, according to the uptime SLO | +| `pingdom_uptime_slo_error_budget_available_seconds` | Number of seconds of downtime we can still have without breaking the uptime SLO | ## Development diff --git a/cmd/pingdom-exporter/main.go b/cmd/pingdom-exporter/main.go index 48e40a5..2847b86 100644 --- a/cmd/pingdom-exporter/main.go +++ b/cmd/pingdom-exporter/main.go @@ -33,6 +33,12 @@ var ( nil, nil, ) + pingdomRateLimitRemainingRequestsDesc = prometheus.NewDesc( + "pingdom_rate_limit_remaining_requests", + "Tracks the remaining requests allowed before hitting the short-term or long-term rate limit in the Pingdom API.", + nil, nil, + ) + pingdomOutageCheckPeriodDesc = prometheus.NewDesc( "pingdom_slo_period_seconds", "Outage check period, in seconds", @@ -96,6 +102,7 @@ type pingdomCollector struct { func (pc pingdomCollector) Describe(ch chan<- *prometheus.Desc) { ch <- pingdomUpDesc + ch <- pingdomRateLimitRemainingRequestsDesc ch <- pingdomOutageCheckPeriodDesc ch <- pingdomCheckStatusDesc ch <- pingdomCheckResponseTimeDesc @@ -110,11 +117,17 @@ func (pc pingdomCollector) Collect(ch chan<- prometheus.Metric) { outageCheckPeriodDuration := time.Hour * time.Duration(24*outageCheckPeriod) outageCheckPeriodSecs := float64(outageCheckPeriodDuration / time.Second) - checks, err := pc.client.Checks.List(map[string]string{ + checks, minReqLimit, err := pc.client.Checks.List(map[string]string{ "include_tags": "true", "tags": pc.client.Tags, }) + ch <- prometheus.MustNewConstMetric( + pingdomRateLimitRemainingRequestsDesc, + prometheus.GaugeValue, + minReqLimit, + ) + if err != nil { fmt.Fprintf(os.Stderr, "Error getting checks: %v", err) ch <- prometheus.MustNewConstMetric( diff --git a/pkg/pingdom/check.go b/pkg/pingdom/check.go index 0e54911..ac5f4c2 100644 --- a/pkg/pingdom/check.go +++ b/pkg/pingdom/check.go @@ -3,6 +3,18 @@ package pingdom import ( "encoding/json" "io/ioutil" + "math" + "net/http" + "regexp" + "strconv" +) + +var ( + reqLimitHeaderKeys = []string{ + "req-limit-short", + "req-limit-long", + } + reqLimitRe = regexp.MustCompile(`Remaining: (\d+) Time until reset: (\d+)`) ) // CheckService provides an interface to Pingdom checks. @@ -13,24 +25,26 @@ type CheckService struct { // List returns a list of checks from Pingdom. // This returns type CheckResponse rather than Check since the // Pingdom API does not return a complete representation of a check. -func (cs *CheckService) List(params ...map[string]string) ([]CheckResponse, error) { +func (cs *CheckService) List(params ...map[string]string) ([]CheckResponse, float64, error) { param := map[string]string{} if len(params) == 1 { param = params[0] } req, err := cs.client.NewRequest("GET", "/checks", param) if err != nil { - return nil, err + return nil, 0, err } resp, err := cs.client.client.Do(req) if err != nil { - return nil, err + return nil, 0, err } defer resp.Body.Close() + minRequestLimit := minRequestLimitFromHeader(resp.Header) + if err := validateResponse(resp); err != nil { - return nil, err + return nil, minRequestLimit, err } bodyBytes, _ := ioutil.ReadAll(resp.Body) @@ -38,5 +52,21 @@ func (cs *CheckService) List(params ...map[string]string) ([]CheckResponse, erro m := &listChecksJSONResponse{} err = json.Unmarshal([]byte(bodyString), &m) - return m.Checks, err + return m.Checks, minRequestLimit, err +} + +func minRequestLimitFromHeader(header http.Header) float64 { + minRequestLimit := math.MaxFloat64 + + for _, key := range reqLimitHeaderKeys { + matches := reqLimitRe.FindStringSubmatch(header.Get(key)) + if len(matches) > 0 { + limit, err := strconv.ParseFloat(matches[1], 64) + if err == nil && limit < minRequestLimit { + minRequestLimit = limit + } + } + } + + return minRequestLimit } diff --git a/pkg/pingdom/check_test.go b/pkg/pingdom/check_test.go index b34d458..c2c8f57 100644 --- a/pkg/pingdom/check_test.go +++ b/pkg/pingdom/check_test.go @@ -2,6 +2,7 @@ package pingdom import ( "fmt" + "math" "net/http" "testing" @@ -14,6 +15,7 @@ func TestCheckServiceList(t *testing.T) { mux.HandleFunc("/checks", func(w http.ResponseWriter, r *http.Request) { testMethod(t, r, "GET") + w.Header().Set("req-limit-long", "Remaining: 12 Time until reset: 34") fmt.Fprint(w, `{ "checks": [ { @@ -141,7 +143,52 @@ func TestCheckServiceList(t *testing.T) { }, } - checks, err := client.Checks.List() + checks, minRequestLimit, err := client.Checks.List() assert.NoError(t, err) assert.Equal(t, want, checks) + assert.EqualValues(t, 12, minRequestLimit) +} + +func TestMinRequestLimitFromResp(t *testing.T) { + tc := []struct { + header http.Header + expected float64 + }{ + { + header: http.Header{}, + expected: math.MaxFloat64, + }, + { + header: http.Header{ + "Req-Limit-Short": []string{"Remaining: 12 Time until reset: 34"}, + }, + expected: 12, + }, + { + header: http.Header{ + "Req-Limit-Long": []string{"Remaining: 56 Time until reset: 78"}, + }, + expected: 56, + }, + { + header: http.Header{ + "Req-Limit-Long": []string{"Remaining: 0 Time until reset: 78"}, + "Req-Limit-Short": []string{"Remaining: 12 Time until reset: 34"}, + }, + expected: 0, + }, + { + header: http.Header{ + "Req-Limit-Long": []string{"invalid"}, + }, + expected: math.MaxFloat64, + }, + } + + for _, tt := range tc { + t.Run(fmt.Sprintf("%v", tt.header), func(t *testing.T) { + actual := minRequestLimitFromHeader(tt.header) + assert.Equal(t, tt.expected, actual) + }) + } }