Skip to content

Commit 0dff3d1

Browse files
committed
feat: add OpenTelemetry metrics instrumentation
1 parent 41a533c commit 0dff3d1

25 files changed

+1134
-719
lines changed

CHANGELOG.md

Lines changed: 30 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,34 @@
1-
# [9.0.0-beta.2](https://github.com/go-redis/redis/compare/v9.0.0-beta.1...v9.0.0-beta.2) (2022-07-28)
2-
3-
4-
### Bug Fixes
5-
6-
* [#2114](https://github.com/go-redis/redis/issues/2114) for redis-server not support Hello ([b6d2a92](https://github.com/go-redis/redis/commit/b6d2a925297e3e516eb5c76c114c1c9fcd5b68c5))
7-
* additional node failures in clustered pipelined reads ([03376a5](https://github.com/go-redis/redis/commit/03376a5d9c7dfd7197b14ce13b24a0431a07a663))
8-
* disregard failed pings in updateLatency() for cluster nodes ([64f972f](https://github.com/go-redis/redis/commit/64f972fbeae401e52a2c066a0e1c922af617e15c))
9-
* don't panic when test cannot start ([9e16c79](https://github.com/go-redis/redis/commit/9e16c79951e7769621b7320f1ecdf04baf539b82))
10-
* handle panic in ringShards Hash function when Ring got closed ([a80b84f](https://github.com/go-redis/redis/commit/a80b84f01f9fc0d3e6f08445ba21f7e07880775e)), closes [#2126](https://github.com/go-redis/redis/issues/2126)
11-
* ignore Nil error when reading EntriesRead ([89d6dfe](https://github.com/go-redis/redis/commit/89d6dfe09a88321d445858c1c5b24d2757b95a3e))
12-
* log errors from cmdsInfoCache ([fa4d1ea](https://github.com/go-redis/redis/commit/fa4d1ea8398cd729ad5cbaaff88e4b8805393945))
13-
* provide a signal channel to end heartbeat goroutine ([f032c12](https://github.com/go-redis/redis/commit/f032c126db3e2c1a239ce1790b0ab81994df75cf))
14-
* remove conn reaper from the pool and uptrace option names ([f6a8adc](https://github.com/go-redis/redis/commit/f6a8adc50cdaec30527f50d06468f9176ee674fe))
15-
* replace heartbeat signal channel with context.WithCancel ([20d0ca2](https://github.com/go-redis/redis/commit/20d0ca235efff48ad48cc05b98790b825d4ba979))
16-
17-
18-
19-
# [9.0.0-beta.1](https://github.com/go-redis/redis/compare/v8.11.5...v9.0.0-beta.1) (2022-06-04)
20-
21-
### Bug Fixes
22-
23-
- **#1943:** xInfoConsumer.Idle should be time.Duration instead of int64
24-
([#2052](https://github.com/go-redis/redis/issues/2052))
25-
([997ab5e](https://github.com/go-redis/redis/commit/997ab5e7e3ddf53837917013a4babbded73e944f)),
26-
closes [#1943](https://github.com/go-redis/redis/issues/1943)
27-
- add XInfoConsumers test
28-
([6f1a1ac](https://github.com/go-redis/redis/commit/6f1a1ac284ea3f683eeb3b06a59969e8424b6376))
29-
- fix tests
30-
([3a722be](https://github.com/go-redis/redis/commit/3a722be81180e4d2a9cf0a29dc9a1ee1421f5859))
31-
- remove test(XInfoConsumer.idle), not a stable return value when tested.
32-
([f5fbb36](https://github.com/go-redis/redis/commit/f5fbb367e7d9dfd7f391fc535a7387002232fa8a))
33-
- update ChannelWithSubscriptions to accept options
34-
([c98c5f0](https://github.com/go-redis/redis/commit/c98c5f0eebf8d254307183c2ce702a48256b718d))
35-
- update COMMAND parser for Redis 7
36-
([b0bb514](https://github.com/go-redis/redis/commit/b0bb514059249e01ed7328c9094e5b8a439dfb12))
37-
- use redis over ssh channel([#2057](https://github.com/go-redis/redis/issues/2057))
38-
([#2060](https://github.com/go-redis/redis/issues/2060))
39-
([3961b95](https://github.com/go-redis/redis/commit/3961b9577f622a3079fe74f8fc8da12ba67a77ff))
40-
41-
### Features
42-
43-
- add ClientUnpause
44-
([91171f5](https://github.com/go-redis/redis/commit/91171f5e19a261dc4cfbf8706626d461b6ba03e4))
45-
- add NewXPendingResult for unit testing XPending
46-
([#2066](https://github.com/go-redis/redis/issues/2066))
47-
([b7fd09e](https://github.com/go-redis/redis/commit/b7fd09e59479bc6ed5b3b13c4645a3620fd448a3))
48-
- add WriteArg and Scan net.IP([#2062](https://github.com/go-redis/redis/issues/2062))
49-
([7d5167e](https://github.com/go-redis/redis/commit/7d5167e8624ac1515e146ed183becb97dadb3d1a))
50-
- **pool:** add check for badConnection
51-
([a8a7665](https://github.com/go-redis/redis/commit/a8a7665ddf8cc657c5226b1826a8ee83dab4b8c1)),
52-
closes [#2053](https://github.com/go-redis/redis/issues/2053)
53-
- provide a username and password callback method, so that the plaintext username and password will
54-
not be stored in the memory, and the username and password will only be generated once when the
55-
CredentialsProvider is called. After the method is executed, the username and password strings on
56-
the stack will be released. ([#2097](https://github.com/go-redis/redis/issues/2097))
57-
([56a3dbc](https://github.com/go-redis/redis/commit/56a3dbc7b656525eb88e0735e239d56e04a23bee))
58-
- upgrade to Redis 7
59-
([d09c27e](https://github.com/go-redis/redis/commit/d09c27e6046129fd27b1d275e5a13a477bd7f778))
60-
611
## v9 UNRELEASED
622

3+
### Added
4+
635
- Added support for [RESP3](https://github.com/antirez/RESP3/blob/master/spec.md) protocol.
64-
- Removed `Pipeline.Close` since there is no real need to explicitly manage pipeline resources.
65-
`Pipeline.Discard` is still available if you want to reset commands for some reason.
66-
- Replaced `*redis.Z` with `redis.Z` since it is small enough to be passed as value.
67-
- Renamed `MaxConnAge` to `ConnMaxLifetime`.
68-
- Renamed `IdleTimeout` to `ConnMaxIdleTime`.
6+
Contributed by @monkey92t who has done a lot of work recently.
7+
- Added `ContextTimeoutEnabled` option that controls whether the client respects context timeouts
8+
and deadlines. See
9+
[Redis Timeouts](https://redis.uptrace.dev/guide/go-redis-debugging.html#timeouts) for details.
10+
- Added `ParseClusterURL` to parse URLs into `ClusterOptions`, for example,
11+
`redis://user:password@localhost:6789?dial_timeout=3&read_timeout=6s&addr=localhost:6790&addr=localhost:6791`.
12+
- Added metrics instrumentation using `redisotel.IstrumentMetrics`. See
13+
[documentation](https://redis.uptrace.dev/guide/go-redis-monitoring.html)
14+
15+
### Changed
16+
17+
- Reworked hook interface and added `DialHook`.
18+
- Replaced `redisotel.NewTracingHook` with `redisotel.InstrumentTracing`. See
19+
[example](example/otel) and
20+
[documentation](https://redis.uptrace.dev/guide/go-redis-monitoring.html).
21+
- Replaced `*redis.Z` with `redis.Z` since it is small enough to be passed as value without making
22+
an allocation.
23+
- Renamed the option `MaxConnAge` to `ConnMaxLifetime`.
24+
- Renamed the option `IdleTimeout` to `ConnMaxIdleTime`.
6925
- Removed connection reaper in favor of `MaxIdleConns`.
70-
- Removed `WithContext`.
26+
- Removed `WithContext` since `context.Context` can be passed directly as an arg.
27+
- Removed `Pipeline.Close` since there is no real need to explicitly manage pipeline resources and
28+
it can be safely reused via `sync.Pool` etc. `Pipeline.Discard` is still available if you want to
29+
reset commands for some reason.
30+
31+
### Fixed
32+
33+
- Improved and fixed pipeline retries.
34+
- As usual, added more commands and fixed some bugs.

cluster.go

Lines changed: 64 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,8 @@ type ClusterOptions struct {
7171
WriteTimeout time.Duration
7272
ContextTimeoutEnabled bool
7373

74-
// PoolFIFO uses FIFO mode for each node connection pool GET/PUT (default LIFO).
75-
PoolFIFO bool
76-
77-
// PoolSize applies per cluster node and not for the whole cluster.
78-
PoolSize int
74+
PoolFIFO bool
75+
PoolSize int // applies per cluster node and not for the whole cluster
7976
PoolTimeout time.Duration
8077
MinIdleConns int
8178
MaxIdleConns int
@@ -391,6 +388,7 @@ type clusterNodes struct {
391388
nodes map[string]*clusterNode
392389
activeAddrs []string
393390
closed bool
391+
onNewNode []func(rdb *Client)
394392

395393
_generation uint32 // atomic
396394
}
@@ -426,6 +424,12 @@ func (c *clusterNodes) Close() error {
426424
return firstErr
427425
}
428426

427+
func (c *clusterNodes) OnNewNode(fn func(rdb *Client)) {
428+
c.mu.Lock()
429+
c.onNewNode = append(c.onNewNode, fn)
430+
c.mu.Unlock()
431+
}
432+
429433
func (c *clusterNodes) Addrs() ([]string, error) {
430434
var addrs []string
431435

@@ -503,6 +507,9 @@ func (c *clusterNodes) GetOrCreate(addr string) (*clusterNode, error) {
503507
}
504508

505509
node = newClusterNode(c.opt, addr)
510+
for _, fn := range c.onNewNode {
511+
fn(node.Client)
512+
}
506513

507514
c.addrs = appendIfNotExists(c.addrs, addr)
508515
c.nodes[addr] = node
@@ -812,18 +819,14 @@ func (c *clusterStateHolder) ReloadOrGet(ctx context.Context) (*clusterState, er
812819

813820
//------------------------------------------------------------------------------
814821

815-
type clusterClient struct {
816-
opt *ClusterOptions
817-
nodes *clusterNodes
818-
state *clusterStateHolder //nolint:structcheck
819-
cmdsInfoCache *cmdsInfoCache //nolint:structcheck
820-
}
821-
822822
// ClusterClient is a Redis Cluster client representing a pool of zero
823823
// or more underlying connections. It's safe for concurrent use by
824824
// multiple goroutines.
825825
type ClusterClient struct {
826-
*clusterClient
826+
opt *ClusterOptions
827+
nodes *clusterNodes
828+
state *clusterStateHolder
829+
cmdsInfoCache *cmdsInfoCache
827830
cmdable
828831
hooks
829832
}
@@ -834,15 +837,18 @@ func NewClusterClient(opt *ClusterOptions) *ClusterClient {
834837
opt.init()
835838

836839
c := &ClusterClient{
837-
clusterClient: &clusterClient{
838-
opt: opt,
839-
nodes: newClusterNodes(opt),
840-
},
840+
opt: opt,
841+
nodes: newClusterNodes(opt),
841842
}
843+
842844
c.state = newClusterStateHolder(c.loadState)
843845
c.cmdsInfoCache = newCmdsInfoCache(c.cmdsInfo)
844846
c.cmdable = c.Process
845847

848+
c.hooks.process = c.process
849+
c.hooks.processPipeline = c._processPipeline
850+
c.hooks.processTxPipeline = c._processTxPipeline
851+
846852
return c
847853
}
848854

@@ -873,13 +879,14 @@ func (c *ClusterClient) Do(ctx context.Context, args ...interface{}) *Cmd {
873879
}
874880

875881
func (c *ClusterClient) Process(ctx context.Context, cmd Cmder) error {
876-
return c.hooks.process(ctx, cmd, c.process)
882+
err := c.hooks.process(ctx, cmd)
883+
cmd.SetErr(err)
884+
return err
877885
}
878886

879887
func (c *ClusterClient) process(ctx context.Context, cmd Cmder) error {
880888
cmdInfo := c.cmdInfo(ctx, cmd.Name())
881889
slot := c.cmdSlot(ctx, cmd)
882-
883890
var node *clusterNode
884891
var ask bool
885892
var lastErr error
@@ -899,11 +906,12 @@ func (c *ClusterClient) process(ctx context.Context, cmd Cmder) error {
899906
}
900907

901908
if ask {
909+
ask = false
910+
902911
pipe := node.Client.Pipeline()
903912
_ = pipe.Process(ctx, NewCmd(ctx, "asking"))
904913
_ = pipe.Process(ctx, cmd)
905914
_, lastErr = pipe.Exec(ctx)
906-
ask = false
907915
} else {
908916
lastErr = node.Client.Process(ctx, cmd)
909917
}
@@ -958,6 +966,10 @@ func (c *ClusterClient) process(ctx context.Context, cmd Cmder) error {
958966
return lastErr
959967
}
960968

969+
func (c *ClusterClient) OnNewNode(fn func(rdb *Client)) {
970+
c.nodes.OnNewNode(fn)
971+
}
972+
961973
// ForEachMaster concurrently calls the fn on each master node in the cluster.
962974
// It returns the first error if any.
963975
func (c *ClusterClient) ForEachMaster(
@@ -1165,7 +1177,7 @@ func (c *ClusterClient) loadState(ctx context.Context) (*clusterState, error) {
11651177

11661178
func (c *ClusterClient) Pipeline() Pipeliner {
11671179
pipe := Pipeline{
1168-
exec: c.processPipeline,
1180+
exec: pipelineExecer(c.hooks.processPipeline),
11691181
}
11701182
pipe.init()
11711183
return &pipe
@@ -1175,10 +1187,6 @@ func (c *ClusterClient) Pipelined(ctx context.Context, fn func(Pipeliner) error)
11751187
return c.Pipeline().Pipelined(ctx, fn)
11761188
}
11771189

1178-
func (c *ClusterClient) processPipeline(ctx context.Context, cmds []Cmder) error {
1179-
return c.hooks.processPipeline(ctx, cmds, c._processPipeline)
1180-
}
1181-
11821190
func (c *ClusterClient) _processPipeline(ctx context.Context, cmds []Cmder) error {
11831191
cmdsMap := newCmdsMap()
11841192

@@ -1258,7 +1266,7 @@ func (c *ClusterClient) cmdsAreReadOnly(ctx context.Context, cmds []Cmder) bool
12581266
func (c *ClusterClient) _processPipelineNode(
12591267
ctx context.Context, node *clusterNode, cmds []Cmder, failedCmds *cmdsMap,
12601268
) {
1261-
_ = node.Client.hooks.processPipeline(ctx, cmds, func(ctx context.Context, cmds []Cmder) error {
1269+
_ = node.Client.hooks.withProcessPipelineHook(ctx, cmds, func(ctx context.Context, cmds []Cmder) error {
12621270
return node.Client.withConn(ctx, func(ctx context.Context, cn *pool.Conn) error {
12631271
if err := cn.WithWriter(c.context(ctx), c.opt.WriteTimeout, func(wr *proto.Writer) error {
12641272
return writeCmds(wr, cmds)
@@ -1344,7 +1352,10 @@ func (c *ClusterClient) checkMovedErr(
13441352
// TxPipeline acts like Pipeline, but wraps queued commands with MULTI/EXEC.
13451353
func (c *ClusterClient) TxPipeline() Pipeliner {
13461354
pipe := Pipeline{
1347-
exec: c.processTxPipeline,
1355+
exec: func(ctx context.Context, cmds []Cmder) error {
1356+
cmds = wrapMultiExec(ctx, cmds)
1357+
return c.hooks.processTxPipeline(ctx, cmds)
1358+
},
13481359
}
13491360
pipe.init()
13501361
return &pipe
@@ -1354,10 +1365,6 @@ func (c *ClusterClient) TxPipelined(ctx context.Context, fn func(Pipeliner) erro
13541365
return c.TxPipeline().Pipelined(ctx, fn)
13551366
}
13561367

1357-
func (c *ClusterClient) processTxPipeline(ctx context.Context, cmds []Cmder) error {
1358-
return c.hooks.processTxPipeline(ctx, cmds, c._processTxPipeline)
1359-
}
1360-
13611368
func (c *ClusterClient) _processTxPipeline(ctx context.Context, cmds []Cmder) error {
13621369
// Trim multi .. exec.
13631370
cmds = cmds[1 : len(cmds)-1]
@@ -1419,38 +1426,38 @@ func (c *ClusterClient) mapCmdsBySlot(ctx context.Context, cmds []Cmder) map[int
14191426
func (c *ClusterClient) _processTxPipelineNode(
14201427
ctx context.Context, node *clusterNode, cmds []Cmder, failedCmds *cmdsMap,
14211428
) {
1422-
_ = node.Client.hooks.processTxPipeline(
1423-
ctx, cmds, func(ctx context.Context, cmds []Cmder) error {
1424-
return node.Client.withConn(ctx, func(ctx context.Context, cn *pool.Conn) error {
1425-
if err := cn.WithWriter(c.context(ctx), c.opt.WriteTimeout, func(wr *proto.Writer) error {
1426-
return writeCmds(wr, cmds)
1427-
}); err != nil {
1428-
setCmdsErr(cmds, err)
1429-
return err
1430-
}
1431-
1432-
return cn.WithReader(c.context(ctx), c.opt.ReadTimeout, func(rd *proto.Reader) error {
1433-
statusCmd := cmds[0].(*StatusCmd)
1434-
// Trim multi and exec.
1435-
trimmedCmds := cmds[1 : len(cmds)-1]
1429+
cmds = wrapMultiExec(ctx, cmds)
1430+
_ = node.Client.hooks.withProcessPipelineHook(ctx, cmds, func(ctx context.Context, cmds []Cmder) error {
1431+
return node.Client.withConn(ctx, func(ctx context.Context, cn *pool.Conn) error {
1432+
if err := cn.WithWriter(c.context(ctx), c.opt.WriteTimeout, func(wr *proto.Writer) error {
1433+
return writeCmds(wr, cmds)
1434+
}); err != nil {
1435+
setCmdsErr(cmds, err)
1436+
return err
1437+
}
14361438

1437-
if err := c.txPipelineReadQueued(
1438-
ctx, rd, statusCmd, trimmedCmds, failedCmds,
1439-
); err != nil {
1440-
setCmdsErr(cmds, err)
1439+
return cn.WithReader(c.context(ctx), c.opt.ReadTimeout, func(rd *proto.Reader) error {
1440+
statusCmd := cmds[0].(*StatusCmd)
1441+
// Trim multi and exec.
1442+
trimmedCmds := cmds[1 : len(cmds)-1]
14411443

1442-
moved, ask, addr := isMovedError(err)
1443-
if moved || ask {
1444-
return c.cmdsMoved(ctx, trimmedCmds, moved, ask, addr, failedCmds)
1445-
}
1444+
if err := c.txPipelineReadQueued(
1445+
ctx, rd, statusCmd, trimmedCmds, failedCmds,
1446+
); err != nil {
1447+
setCmdsErr(cmds, err)
14461448

1447-
return err
1449+
moved, ask, addr := isMovedError(err)
1450+
if moved || ask {
1451+
return c.cmdsMoved(ctx, trimmedCmds, moved, ask, addr, failedCmds)
14481452
}
14491453

1450-
return pipelineReadCmds(rd, trimmedCmds)
1451-
})
1454+
return err
1455+
}
1456+
1457+
return pipelineReadCmds(rd, trimmedCmds)
14521458
})
14531459
})
1460+
})
14541461
}
14551462

14561463
func (c *ClusterClient) txPipelineReadQueued(
@@ -1742,7 +1749,7 @@ func (c *ClusterClient) cmdNode(
17421749
return state.slotMasterNode(slot)
17431750
}
17441751

1745-
func (c *clusterClient) slotReadOnlyNode(state *clusterState, slot int) (*clusterNode, error) {
1752+
func (c *ClusterClient) slotReadOnlyNode(state *clusterState, slot int) (*clusterNode, error) {
17461753
if c.opt.RouteByLatency {
17471754
return state.slotClosestNode(slot)
17481755
}

cluster_commands.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88

99
func (c *ClusterClient) DBSize(ctx context.Context) *IntCmd {
1010
cmd := NewIntCmd(ctx, "dbsize")
11-
_ = c.hooks.process(ctx, cmd, func(ctx context.Context, _ Cmder) error {
11+
_ = c.hooks.withProcessHook(ctx, cmd, func(ctx context.Context, _ Cmder) error {
1212
var size int64
1313
err := c.ForEachMaster(ctx, func(ctx context.Context, master *Client) error {
1414
n, err := master.DBSize(ctx).Result()
@@ -30,7 +30,7 @@ func (c *ClusterClient) DBSize(ctx context.Context) *IntCmd {
3030

3131
func (c *ClusterClient) ScriptLoad(ctx context.Context, script string) *StringCmd {
3232
cmd := NewStringCmd(ctx, "script", "load", script)
33-
_ = c.hooks.process(ctx, cmd, func(ctx context.Context, _ Cmder) error {
33+
_ = c.hooks.withProcessHook(ctx, cmd, func(ctx context.Context, _ Cmder) error {
3434
mu := &sync.Mutex{}
3535
err := c.ForEachShard(ctx, func(ctx context.Context, shard *Client) error {
3636
val, err := shard.ScriptLoad(ctx, script).Result()
@@ -56,7 +56,7 @@ func (c *ClusterClient) ScriptLoad(ctx context.Context, script string) *StringCm
5656

5757
func (c *ClusterClient) ScriptFlush(ctx context.Context) *StatusCmd {
5858
cmd := NewStatusCmd(ctx, "script", "flush")
59-
_ = c.hooks.process(ctx, cmd, func(ctx context.Context, _ Cmder) error {
59+
_ = c.hooks.withProcessHook(ctx, cmd, func(ctx context.Context, _ Cmder) error {
6060
err := c.ForEachShard(ctx, func(ctx context.Context, shard *Client) error {
6161
return shard.ScriptFlush(ctx).Err()
6262
})
@@ -82,8 +82,8 @@ func (c *ClusterClient) ScriptExists(ctx context.Context, hashes ...string) *Boo
8282
result[i] = true
8383
}
8484

85-
_ = c.hooks.process(ctx, cmd, func(ctx context.Context, _ Cmder) error {
86-
mu := &sync.Mutex{}
85+
_ = c.hooks.withProcessHook(ctx, cmd, func(ctx context.Context, _ Cmder) error {
86+
var mu sync.Mutex
8787
err := c.ForEachShard(ctx, func(ctx context.Context, shard *Client) error {
8888
val, err := shard.ScriptExists(ctx, hashes...).Result()
8989
if err != nil {

0 commit comments

Comments
 (0)