Skip to content

Commit 2b94124

Browse files
committed
query rejection - add integration test, fix query_step_limit
Signed-off-by: Erlan Zholdubai uulu <erlanz@amazon.com>
1 parent b4737ef commit 2b94124

File tree

6 files changed

+424
-51
lines changed

6 files changed

+424
-51
lines changed

docs/configuration/config-file-reference.md

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3287,9 +3287,11 @@ query_rejection:
32873287
# CLI flag: -frontend.query-rejection.enabled
32883288
[enabled: <boolean> | default = false]
32893289

3290-
# List of query attributes that queries will be matched against. Query will be
3291-
# matched against each of those query attributes separately and if query
3292-
# matches any of them then it will be rejected.
3290+
# List of query_attributes to match and reject queries. A query is rejected if
3291+
# it matches any query_attribute in this list. Each query_attribute has
3292+
# several properties (e.g., regex, time_window, user_agent), and all specified
3293+
# properties must match for a query_attribute to be considered a match. Only
3294+
# the specified keys are checked, and an AND operator is applied to them.
32933295
[query_attributes: <list of QueryAttribute> | default = []]
32943296

32953297
# Duration to delay the evaluation of rules to ensure the underlying metrics
@@ -5352,9 +5354,12 @@ limits:
53525354
# priority level. Value between 0 and 1 will be used as a percentage.
53535355
[reserved_queriers: <float> | default = 0]
53545356
5355-
# List of query attributes that queries will be matched against. Query will be
5356-
# matched against each of those query attributes separately and if query matches
5357-
# any of them then it will be assigned to this priority.
5357+
# List of query_attributes to match and assign priority to queries. A query is
5358+
# assigned to this priority if it matches any query_attribute in this list. Each
5359+
# query_attribute has several properties (e.g., regex, time_window, user_agent),
5360+
# and all specified properties must match for a query_attribute to be considered
5361+
# a match. Only the specified keys are checked, and an AND operator is applied
5362+
# to them.
53585363
[query_attributes: <list of QueryAttribute> | default = []]
53595364
```
53605365

@@ -5379,8 +5384,8 @@ time_window:
53795384
# checked.
53805385
[end: <int> | default = 0]
53815386
5382-
# Queries with time range that is within this limits will match. If not set, it
5383-
# won't be checked.
5387+
# Query time range should be within this limit to match. If not set, it won't be
5388+
# checked.
53845389
time_range_limit:
53855390
# Query time range should be above or equal to this value to match. If set to
53865391
# 0, it won't be checked.
@@ -5390,8 +5395,9 @@ time_range_limit:
53905395
# 0, it won't be checked.
53915396
[max: <int> | default = 0]
53925397
5393-
# Limit that query step should be within. It will check subquery steps as well.
5394-
# If not set, it won't be checked.
5398+
# For range query, step should be within this limit to match. For instant query,
5399+
# subQuery step should be within this limit to match. If not set, it won't be
5400+
# checked. This attribute won't be applied to metadata queries.
53955401
query_step_limit:
53965402
# Query step should be above or equal to this value to match. If set to 0, it
53975403
# won't be checked.
@@ -5403,16 +5409,17 @@ query_step_limit:
54035409
54045410
# Regex that User-Agent header of the request should match. If not set, it won't
54055411
# be checked.
5406-
[user_agent: <string> | default = ""]
5412+
[user_agent_regex: <string> | default = ""]
54075413
54085414
# Grafana includes X-Dashboard-Uid header in query requests. If this field is
54095415
# provided then X-Dashboard-Uid header of request should match this value. If
5410-
# not set, it won't be checked.
5416+
# not set, it won't be checked. This attribute won't be applied to metadata
5417+
# queries.
54115418
[dashboard_uid: <string> | default = ""]
54125419
54135420
# Grafana includes X-Panel-Id header in query requests. If this field is
54145421
# provided then X-Panel-Id header of request should match this value. If not
5415-
# set, it won't be checked.
5422+
# set, it won't be checked. This attribute won't be applied to metadata queries.
54165423
[panel_id: <string> | default = ""]
54175424
```
54185425

integration/e2ecortex/client.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ type Client struct {
5050
httpClient *http.Client
5151
querierClient promv1.API
5252
orgID string
53+
headers []string
5354
}
5455

5556
// NewClient makes a new Cortex client
@@ -59,6 +60,7 @@ func NewClient(
5960
alertmanagerAddress string,
6061
rulerAddress string,
6162
orgID string,
63+
headers ...string,
6264
) (*Client, error) {
6365
// Create querier API client
6466
querierAPIClient, err := promapi.NewClient(promapi.Config{
@@ -78,6 +80,7 @@ func NewClient(
7880
httpClient: &http.Client{},
7981
querierClient: promv1.NewAPI(querierAPIClient),
8082
orgID: orgID,
83+
headers: headers,
8184
}
8285

8386
if alertmanagerAddress != "" {
@@ -409,6 +412,10 @@ func (c *Client) query(addr string) (*http.Response, []byte, error) {
409412

410413
req.Header.Set("X-Scope-OrgID", c.orgID)
411414

415+
for i := 0; i < len(c.headers); i += 2 {
416+
req.Header.Set(c.headers[i], c.headers[i+1])
417+
}
418+
412419
retries := backoff.New(ctx, backoff.Config{
413420
MinBackoff: 1 * time.Second,
414421
MaxBackoff: 2 * time.Second,

integration/query_frontend_test.go

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
package integration
55

66
import (
7+
"bytes"
8+
"context"
79
"crypto/x509"
810
"crypto/x509/pkix"
911
"fmt"
@@ -15,6 +17,10 @@ import (
1517
"testing"
1618
"time"
1719

20+
"github.com/thanos-io/objstore/providers/s3"
21+
22+
"github.com/cortexproject/cortex/pkg/querier/tripperware"
23+
1824
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
1925
"github.com/prometheus/common/model"
2026
"github.com/prometheus/prometheus/model/labels"
@@ -585,3 +591,221 @@ func TestQueryFrontendMaxQueryLengthLimits(t *testing.T) {
585591
require.Equal(t, http.StatusBadRequest, resp.StatusCode)
586592
require.Contains(t, string(body), "the query time range exceeds the limit")
587593
}
594+
595+
func TestQueryFrontendQueryRejection(t *testing.T) {
596+
configFileName := "runtime-config.yaml"
597+
598+
s, err := e2e.NewScenario(networkName)
599+
require.NoError(t, err)
600+
defer s.Close()
601+
602+
// Start dependencies.
603+
consul := e2edb.NewConsul()
604+
minio := e2edb.NewMinio(9000, bucketName, rulestoreBucketName)
605+
require.NoError(t, s.StartAndWaitReady(consul, minio))
606+
607+
// Configure the blocks storage to frequently compact TSDB head
608+
// and ship blocks to the storage.
609+
flags := mergeFlags(BlocksStorageFlags(), map[string]string{
610+
"-runtime-config.backend": "s3",
611+
"-runtime-config.s3.access-key-id": e2edb.MinioAccessKey,
612+
"-runtime-config.s3.secret-access-key": e2edb.MinioSecretKey,
613+
"-runtime-config.s3.bucket-name": bucketName,
614+
"-runtime-config.s3.endpoint": fmt.Sprintf("%s-minio-9000:9000", networkName),
615+
"-runtime-config.s3.insecure": "true",
616+
"-runtime-config.file": configFileName,
617+
"-runtime-config.reload-period": "1s",
618+
})
619+
620+
client, err := s3.NewBucketWithConfig(nil, s3.Config{
621+
Endpoint: minio.HTTPEndpoint(),
622+
Insecure: true,
623+
Bucket: bucketName,
624+
AccessKey: e2edb.MinioAccessKey,
625+
SecretKey: e2edb.MinioSecretKey,
626+
}, "runtime-config-test")
627+
628+
require.NoError(t, err)
629+
630+
// update runtime config
631+
newRuntimeConfig := []byte(`overrides:
632+
user-1:
633+
query_rejection:
634+
enabled: true
635+
query_attributes:
636+
- regex: .*rate.*
637+
query_step_limit:
638+
min: 6s
639+
max: 20m
640+
dashboard_uid: "dash123"
641+
`)
642+
require.NoError(t, client.Upload(context.Background(), configFileName, bytes.NewReader(newRuntimeConfig)))
643+
time.Sleep(2 * time.Second)
644+
645+
queryFrontend := e2ecortex.NewQueryFrontendWithConfigFile("query-frontend", "", flags, "")
646+
require.NoError(t, s.Start(queryFrontend))
647+
648+
flags["-querier.frontend-address"] = queryFrontend.NetworkGRPCEndpoint()
649+
650+
// Start all other services.
651+
ingester := e2ecortex.NewIngesterWithConfigFile("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), "", flags, "")
652+
querier := e2ecortex.NewQuerierWithConfigFile("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), "", flags, "")
653+
654+
require.NoError(t, s.StartAndWaitReady(querier, ingester))
655+
require.NoError(t, s.WaitReady(queryFrontend))
656+
657+
// Wait until querier have updated the ring.
658+
require.NoError(t, querier.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
659+
660+
c, err := e2ecortex.NewClient("", queryFrontend.HTTPEndpoint(), "", "", "user-1", "X-Dashboard-Uid", "dash123", "User-Agent", "grafana-agent/v0.19.0")
661+
require.NoError(t, err)
662+
663+
now := time.Now()
664+
// We expect request to be rejected as it matches query_attribute of query_rejection (contains rate, step limit within 6s and 20h and contains dashboard header dash123).
665+
// Query shouldn't be checked against attributes that is not provided in query_attribute config(time_window, time_range_limit, user_agent_regex, panel_id)
666+
resp, body, err := c.QueryRaw(`min_over_time( rate(http_requests_total[5m])[30m:1m] )`, now)
667+
require.NoError(t, err)
668+
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
669+
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)
670+
671+
// We expect request to succeed as it doesn't match query rejection(no step limit).
672+
resp, body, err = c.QueryRaw(`rate(test[30d])`, now)
673+
require.NoError(t, err)
674+
require.Equal(t, http.StatusOK, resp.StatusCode)
675+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
676+
677+
// update runtime config
678+
newRuntimeConfig = []byte(`overrides:
679+
user-1:
680+
query_rejection:
681+
enabled: true
682+
query_attributes:
683+
- regex: .*rate.*
684+
time_window:
685+
start: 12h
686+
end: 0h
687+
time_range_limit:
688+
min: 2h
689+
max: 6h
690+
query_step_limit:
691+
min: 22m
692+
dashboard_uid: "dash123"
693+
user_agent_regex: "grafana.*"
694+
`)
695+
require.NoError(t, client.Upload(context.Background(), configFileName, bytes.NewReader(newRuntimeConfig)))
696+
time.Sleep(2 * time.Second)
697+
698+
// We expect request to be rejected, as it matches query_attribute (contains 'rate', within time_window(11h-8h), within time range(3h), within step limit(25m>22m), contains dashboard header(dash123) and user-agent matches regex).
699+
resp, body, err = c.QueryRangeRaw(`rate(test[1m])`, now.Add(-11*time.Hour), now.Add(-8*time.Hour), 25*time.Minute)
700+
require.NoError(t, err)
701+
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
702+
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)
703+
704+
// We expect request not to be rejected, as it doesn't match query step limit (min is 22m, and actual step is 20m)
705+
resp, body, err = c.QueryRangeRaw(`rate(test[1m])`, now.Add(-11*time.Hour), now.Add(-8*time.Hour), 20*time.Minute)
706+
require.NoError(t, err)
707+
require.Equal(t, http.StatusOK, resp.StatusCode)
708+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
709+
710+
// We expect request not to be rejected, as it goes beyond time_window(-15h is outside of 12h-0h window)
711+
resp, body, err = c.QueryRangeRaw(`rate(test[1m])`, now.Add(-15*time.Hour), now.Add(-8*time.Hour), 25*time.Minute)
712+
require.NoError(t, err)
713+
require.Equal(t, http.StatusOK, resp.StatusCode)
714+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
715+
716+
// We expect request not to be rejected as it goes beyond time-range(9h is bigger than max time range of 6h)
717+
resp, body, err = c.QueryRangeRaw(`rate(test[1m])`, now.Add(-11*time.Hour), now.Add(-2*time.Hour), 25*time.Minute)
718+
require.NoError(t, err)
719+
require.Equal(t, http.StatusOK, resp.StatusCode)
720+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
721+
722+
// We expect request not to be rejected, as it doesn't match regex (doesn't contain 'rate')
723+
resp, body, err = c.QueryRangeRaw(`increase(test[1m])`, now.Add(-11*time.Hour), now.Add(-8*time.Hour), 25*time.Minute)
724+
require.NoError(t, err)
725+
require.Equal(t, http.StatusOK, resp.StatusCode)
726+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
727+
728+
goClient, err := e2ecortex.NewClient("", queryFrontend.HTTPEndpoint(), "", "", "user-1", "X-Dashboard-Uid", "dash123", "User-Agent", "go-client/v0.19.0")
729+
require.NoError(t, err)
730+
731+
// We expect request not to be rejected, as it doesn't match user-agent regex (doesn't contain 'grafana')
732+
resp, body, err = goClient.QueryRangeRaw(`rate(test[1m])`, now.Add(-11*time.Hour), now.Add(-8*time.Hour), 25*time.Minute)
733+
require.NoError(t, err)
734+
require.Equal(t, http.StatusOK, resp.StatusCode)
735+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
736+
737+
newDashboardClient, err := e2ecortex.NewClient("", queryFrontend.HTTPEndpoint(), "", "", "user-1", "X-Dashboard-Uid", "new-dashboard", "User-Agent", "grafana-agent/v0.19.0")
738+
require.NoError(t, err)
739+
740+
// We expect request not to be rejected, as it doesn't match grafana dashboard uid ('dash123' != 'new-dashboard')
741+
resp, body, err = newDashboardClient.QueryRangeRaw(`rate(test[1m])`, now.Add(-11*time.Hour), now.Add(-8*time.Hour), 25*time.Minute)
742+
require.NoError(t, err)
743+
require.Equal(t, http.StatusOK, resp.StatusCode)
744+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
745+
746+
metadataClient, err := e2ecortex.NewClient("", queryFrontend.HTTPEndpoint(), "", "", "user-1", "User-Agent", "grafana-agent/v0.19.0")
747+
require.NoError(t, err)
748+
749+
// We expect request to be rejected for series request, as it has at least one matcher matching regex(contains 'rate'), within time_window(11h-8h, within time_range(3h).
750+
// query_step_limit, dashboard_uid, panel_id fields are ignored for metadata queries.
751+
resp, body, err = metadataClient.SeriesRaw([]string{`http_requests_rate_total{job="prometheus"}`}, now.Add(-11*time.Hour), now.Add(-8*time.Hour))
752+
require.NoError(t, err)
753+
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
754+
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)
755+
756+
// We expect request not to be rejected for series request as it does not have at least one matcher matching regex
757+
resp, body, err = metadataClient.SeriesRaw([]string{`http_requests_total{job="prometheus"}`}, now.Add(-11*time.Hour), now.Add(-8*time.Hour))
758+
require.NoError(t, err)
759+
require.Equal(t, http.StatusOK, resp.StatusCode)
760+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
761+
762+
// We expect request to be rejected for labels request, as it has at least one matcher matching regex(contains 'rate'), within time_window(11h-8h, within time_range(3h).
763+
// query_step_limit, dashboard_uid, panel_id properties are ignored for metadata queries.
764+
resp, body, err = metadataClient.LabelNamesRaw([]string{`http_requests_rate_total{job="prometheus"}`}, now.Add(-11*time.Hour), now.Add(-8*time.Hour))
765+
require.NoError(t, err)
766+
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
767+
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)
768+
769+
// We expect request not to be rejected if label/label_values request has no matcher, but rejection query_attribute has regex property specified.
770+
// All the provided query_attributes fields that can be applied to metadata queries should match
771+
resp, body, err = metadataClient.LabelNamesRaw([]string{}, now.Add(-11*time.Hour), now.Add(-8*time.Hour))
772+
require.NoError(t, err)
773+
require.Equal(t, http.StatusOK, resp.StatusCode)
774+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
775+
776+
// We expect request not to be rejected if label/label_values request doesn't provide time but rejection query_attribute has time_window property
777+
resp, body, err = metadataClient.LabelNamesRaw([]string{`http_requests_rate_total{job="prometheus"}`}, time.Time{}, time.Time{})
778+
require.NoError(t, err)
779+
require.Equal(t, http.StatusOK, resp.StatusCode)
780+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
781+
782+
// We expect request not to be rejected if label/label_values request doesn't provide one of the time(startTime or endTime) but rejection query_attribute has both time_window limits
783+
resp, body, err = metadataClient.LabelNamesRaw([]string{`http_requests_rate_total{job="prometheus"}`}, now.Add(-11*time.Hour), time.Time{})
784+
require.NoError(t, err)
785+
require.Equal(t, http.StatusOK, resp.StatusCode)
786+
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)
787+
788+
// update runtime config
789+
newRuntimeConfig = []byte(`overrides:
790+
user-1:
791+
query_rejection:
792+
enabled: true
793+
query_attributes:
794+
- regex: .*rate.*
795+
time_window:
796+
start: 12h
797+
end: 0h
798+
- user_agent_regex: "grafana.*"
799+
`)
800+
require.NoError(t, client.Upload(context.Background(), configFileName, bytes.NewReader(newRuntimeConfig)))
801+
time.Sleep(2 * time.Second)
802+
803+
// We expect request to be rejected if any of the listed query_attributes configuration matches. Two query_attributes provided here, and doesn't match regex from first one, but matches second one.
804+
// query rejection should consider only provided attributes.
805+
// There is no regex, time_window, time_range_limit on second query_attribute. Only user_agent_regex provided so any query with this agent should be rejected
806+
resp, body, err = metadataClient.LabelValuesRaw("cluster", []string{}, time.Time{}, time.Time{})
807+
require.NoError(t, err)
808+
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
809+
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)
810+
811+
}

0 commit comments

Comments
 (0)