Skip to content

Commit

Permalink
query-frontend: make HTTP downstream tripper configurable (thanos-io#…
Browse files Browse the repository at this point in the history
…4623)

* query-frontend: make HTTP downstream tripper configurable

Make the HTTP downstream tripper configurable so that it would be
possible to set the number of maximum idle connections per host to more
than 2 so that HTTP keep-alive connections could be used on higher
loads.

Solves thanos-io#4571.

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>

* CHANGELOG: add entry

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
  • Loading branch information
GiedriusS authored and someshkoli committed Nov 7, 2021
1 parent 61d4320 commit d07f611
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#4506](https://github.com/thanos-io/thanos/pull/4506) `Baidu BOS` object storage, see [documents](docs/storage.md#baidu-bos) for further information.
- [#4552](https://github.com/thanos-io/thanos/pull/4552) Compact: Adds `thanos_compact_downsample_duration_seconds` histogram.
- [#4594](https://github.com/thanos-io/thanos/pull/4594) reloader: Expose metrics in config reloader to give info on the last operation.
- [#4623](https://github.com/thanos-io/thanos/pull/4623) query-frontend: made HTTP downstream tripper (client) configurable via parameters `--query-range.downstream-tripper-config` and `--query-range.downstream-tripper-config-file`. If your downstream URL is localhost or 127.0.0.1 then it is strongly recommended to bump `max_idle_conns_per_host` to at least 100 so that `query-frontend` could properly use HTTP keep-alive connections and thus reduce the latency of `query-frontend` by about 20%.

### Fixed

Expand Down
50 changes: 49 additions & 1 deletion cmd/thanos/query_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/weaveworks/common/user"
"gopkg.in/yaml.v2"

extflag "github.com/efficientgo/tools/extkingpin"
"github.com/thanos-io/thanos/pkg/api"
"github.com/thanos-io/thanos/pkg/component"
"github.com/thanos-io/thanos/pkg/exthttp"
"github.com/thanos-io/thanos/pkg/extkingpin"
"github.com/thanos-io/thanos/pkg/extprom"
extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
Expand Down Expand Up @@ -118,6 +120,8 @@ func registerQueryFrontend(app *extkingpin.App) {
cmd.Flag("query-frontend.downstream-url", "URL of downstream Prometheus Query compatible API.").
Default("http://localhost:9090").StringVar(&cfg.DownstreamURL)

cfg.DownstreamTripperConfig.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "query-frontend.downstream-tripper-config", "YAML file that contains downstream tripper configuration. If your downstream URL is localhost or 127.0.0.1 then it is highly recommended to increase max_idle_conns_per_host to at least 100.", extflag.WithEnvSubstitution())

cmd.Flag("query-frontend.compress-responses", "Compress HTTP responses.").
Default("false").BoolVar(&cfg.CompressResponses)

Expand All @@ -142,6 +146,41 @@ func registerQueryFrontend(app *extkingpin.App) {
})
}

func parseTransportConfiguration(downstreamTripperConfContentYaml []byte) (*http.Transport, error) {
downstreamTripper := exthttp.NewTransport()

if len(downstreamTripperConfContentYaml) > 0 {
tripperConfig := &queryfrontend.DownstreamTripperConfig{}
if err := yaml.UnmarshalStrict(downstreamTripperConfContentYaml, tripperConfig); err != nil {
return nil, errors.Wrap(err, "parsing downstream tripper config YAML file")
}

if tripperConfig.IdleConnTimeout > 0 {
downstreamTripper.IdleConnTimeout = time.Duration(tripperConfig.IdleConnTimeout)
}
if tripperConfig.ResponseHeaderTimeout > 0 {
downstreamTripper.ResponseHeaderTimeout = time.Duration(tripperConfig.ResponseHeaderTimeout)
}
if tripperConfig.TLSHandshakeTimeout > 0 {
downstreamTripper.TLSHandshakeTimeout = time.Duration(tripperConfig.TLSHandshakeTimeout)
}
if tripperConfig.ExpectContinueTimeout > 0 {
downstreamTripper.ExpectContinueTimeout = time.Duration(tripperConfig.ExpectContinueTimeout)
}
if tripperConfig.MaxIdleConns != nil {
downstreamTripper.MaxIdleConns = *tripperConfig.MaxIdleConns
}
if tripperConfig.MaxIdleConnsPerHost != nil {
downstreamTripper.MaxIdleConnsPerHost = *tripperConfig.MaxIdleConnsPerHost
}
if tripperConfig.MaxConnsPerHost != nil {
downstreamTripper.MaxConnsPerHost = *tripperConfig.MaxConnsPerHost
}
}

return downstreamTripper, nil
}

func runQueryFrontend(
g *run.Group,
logger log.Logger,
Expand Down Expand Up @@ -191,7 +230,16 @@ func runQueryFrontend(
}

// Create a downstream roundtripper.
roundTripper, err := cortexfrontend.NewDownstreamRoundTripper(cfg.DownstreamURL, http.DefaultTransport)
downstreamTripperConfContentYaml, err := cfg.DownstreamTripperConfig.CachePathOrContent.Content()
if err != nil {
return err
}
downstreamTripper, err := parseTransportConfiguration(downstreamTripperConfContentYaml)
if err != nil {
return err
}

roundTripper, err := cortexfrontend.NewDownstreamRoundTripper(cfg.DownstreamURL, downstreamTripper)
if err != nil {
return errors.Wrap(err, "setup downstream roundtripper")
}
Expand Down
31 changes: 31 additions & 0 deletions docs/components/query-frontend.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,22 @@ Query Frontend supports `--query-frontend.log-queries-longer-than` flag to log q

Naming is hard :) Please check [here](https://github.com/thanos-io/thanos/pull/2434#discussion_r408300683) to see why we chose `query-frontend` as the name.

## Recommended Downstream Tripper Configuration

You can configure the parameters of the HTTP client that `query-frontend` uses for the downstream URL with parameters `--query-range.downstream-tripper-config` and `--query-range.downstream-tripper-config-file`. If it is pointing to a single host, most likely a load-balancer, then it is highly recommended to increase `max_idle_conns_per_host` via these parameters to at least 100 because otherwise `query-frontend` will not be able to leverage HTTP keep-alive connections, and the latency will be 10 - 20% higher. By default, the Go HTTP client will only keep two idle connections per each host.

Keys which denote a duration are strings that can end with `s` or `m` to indicate seconds or minutes respectively. All of the other keys are integers. Supported keys are:

* `idle_conn_timeout` - timeout of idle connections (string);
* `response_header_timeout` - maximum duration to wait for a response header (string);
* `tls_handshake_timeout` - maximum duration of a TLS handshake (string);
* `expect_continue_timeout` - [Go source code](https://github.com/golang/go/blob/912f0750472dd4f674b69ca1616bfaf377af1805/src/net/http/transport.go#L220-L226) (string);
* `max_idle_conns` - maximum number of idle connections to all hosts (integer);
* `max_idle_conns_per_host` - maximum number of idle connections to each host (integer);
* `max_conns_per_host` - maximum number of connections to each host (integer);

You can find the default values [here](https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/exthttp/transport.go#L12-L27).

## Flags

```$ mdox-exec="thanos query-frontend --help"
Expand Down Expand Up @@ -170,6 +186,21 @@ Flags:
Disable request logging.
--query-frontend.compress-responses
Compress HTTP responses.
--query-frontend.downstream-tripper-config=<content>
Alternative to
'query-frontend.downstream-tripper-config-file'
flag (mutually exclusive). Content of YAML file
that contains downstream tripper configuration.
If your downstream URL is localhost or
127.0.0.1 then it is highly recommended to
increase max_idle_conns_per_host to at least
100.
--query-frontend.downstream-tripper-config-file=<file-path>
Path to YAML file that contains downstream
tripper configuration. If your downstream URL
is localhost or 127.0.0.1 then it is highly
recommended to increase max_idle_conns_per_host
to at least 100.
--query-frontend.downstream-url="http://localhost:9090"
URL of downstream Prometheus Query compatible
API.
Expand Down
15 changes: 15 additions & 0 deletions pkg/queryfrontend/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"gopkg.in/yaml.v2"

extflag "github.com/efficientgo/tools/extkingpin"
prommodel "github.com/prometheus/common/model"
"github.com/thanos-io/thanos/pkg/cacheutil"
"github.com/thanos-io/thanos/pkg/model"
)
Expand Down Expand Up @@ -138,10 +139,24 @@ func NewCacheConfig(logger log.Logger, confContentYaml []byte) (*cortexcache.Con
}
}

// DownstreamTripperConfig stores the http.Transport configuration for query-frontend's HTTP downstream tripper.
type DownstreamTripperConfig struct {
IdleConnTimeout prommodel.Duration `yaml:"idle_conn_timeout"`
ResponseHeaderTimeout prommodel.Duration `yaml:"response_header_timeout"`
TLSHandshakeTimeout prommodel.Duration `yaml:"tls_handshake_timeout"`
ExpectContinueTimeout prommodel.Duration `yaml:"expect_continue_timeout"`
MaxIdleConns *int `yaml:"max_idle_conns"`
MaxIdleConnsPerHost *int `yaml:"max_idle_conns_per_host"`
MaxConnsPerHost *int `yaml:"max_conns_per_host"`

CachePathOrContent extflag.PathOrContent
}

// Config holds the query frontend configs.
type Config struct {
QueryRangeConfig
LabelsConfig
DownstreamTripperConfig

CortexHandlerConfig *transport.HandlerConfig
CompressResponses bool
Expand Down

0 comments on commit d07f611

Please sign in to comment.