Skip to content

Commit b65bc6e

Browse files
committed
bugfixes and default config optimization
1 parent f9af430 commit b65bc6e

File tree

12 files changed

+257
-177
lines changed

12 files changed

+257
-177
lines changed

async_handoff_integration_test.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ type mockNetConn struct {
1717
}
1818

1919
func (m *mockNetConn) Read(b []byte) (n int, err error) { return 0, nil }
20-
func (m *mockNetConn) Write(b []byte) (n int, err error) { return len(b), nil }
21-
func (m *mockNetConn) Close() error { return nil }
22-
func (m *mockNetConn) LocalAddr() net.Addr { return &mockAddr{m.addr} }
23-
func (m *mockNetConn) RemoteAddr() net.Addr { return &mockAddr{m.addr} }
24-
func (m *mockNetConn) SetDeadline(t time.Time) error { return nil }
25-
func (m *mockNetConn) SetReadDeadline(t time.Time) error { return nil }
20+
func (m *mockNetConn) Write(b []byte) (n int, err error) { return len(b), nil }
21+
func (m *mockNetConn) Close() error { return nil }
22+
func (m *mockNetConn) LocalAddr() net.Addr { return &mockAddr{m.addr} }
23+
func (m *mockNetConn) RemoteAddr() net.Addr { return &mockAddr{m.addr} }
24+
func (m *mockNetConn) SetDeadline(t time.Time) error { return nil }
25+
func (m *mockNetConn) SetReadDeadline(t time.Time) error { return nil }
2626
func (m *mockNetConn) SetWriteDeadline(t time.Time) error { return nil }
2727

2828
type mockAddr struct {
@@ -152,8 +152,8 @@ func TestEventDrivenHandoffIntegration(t *testing.T) {
152152
return &mockNetConn{addr: "original:6379"}, nil
153153
},
154154

155-
PoolSize: int32(10),
156-
PoolTimeout: time.Second,
155+
PoolSize: int32(10),
156+
PoolTimeout: time.Second,
157157
})
158158
defer testPool.Close()
159159

@@ -175,7 +175,7 @@ func TestEventDrivenHandoffIntegration(t *testing.T) {
175175
// Get connection
176176
conn, err := testPool.Get(ctx)
177177
if err != nil {
178-
t.Errorf("Failed to get connection %d: %v", id, err)
178+
t.Errorf("Failed to get conn[%d]: %v", id, err)
179179
return
180180
}
181181

@@ -224,8 +224,8 @@ func TestEventDrivenHandoffIntegration(t *testing.T) {
224224
return &mockNetConn{addr: "original:6379"}, nil
225225
},
226226

227-
PoolSize: int32(3),
228-
PoolTimeout: time.Second,
227+
PoolSize: int32(3),
228+
PoolTimeout: time.Second,
229229
})
230230
defer testPool.Close()
231231

@@ -287,8 +287,8 @@ func TestEventDrivenHandoffIntegration(t *testing.T) {
287287
return &mockNetConn{addr: "original:6379"}, nil
288288
},
289289

290-
PoolSize: int32(2),
291-
PoolTimeout: time.Second,
290+
PoolSize: int32(2),
291+
PoolTimeout: time.Second,
292292
})
293293
defer testPool.Close()
294294

hitless/README.md

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,27 +39,31 @@ Config: &hitless.Config{
3939
PostHandoffRelaxedDuration: 20 * time.Second, // Keep relaxed timeout after handoff
4040
LogLevel: logging.LogLevelWarn, // LogLevelError, LogLevelWarn, LogLevelInfo, LogLevelDebug
4141
MaxWorkers: 15, // Concurrent handoff workers
42-
HandoffQueueSize: 50, // Handoff request queue size
42+
HandoffQueueSize: 300, // Handoff request queue size
4343
}
4444
```
4545

4646
### Worker Scaling
47-
- **Auto-calculated**: `min(10, PoolSize/3)` - scales with pool size, capped at 10
48-
- **Explicit values**: `max(10, set_value)` - enforces minimum 10 workers
47+
- **Auto-calculated**: `min(PoolSize/2, max(10, PoolSize/3))` - balanced scaling approach
48+
- **Explicit values**: `max(PoolSize/2, set_value)` - enforces minimum PoolSize/2 workers
4949
- **On-demand**: Workers created when needed, cleaned up when idle
5050

5151
### Queue Sizing
52-
- **Auto-calculated**: `max(8 × MaxWorkers, max(50, PoolSize/2))` - hybrid scaling
53-
- Worker-based: 8 handoffs per worker for burst processing
54-
- Pool-based: Scales with pool size (minimum 50, up to PoolSize/2)
52+
- **Auto-calculated**: `max(20 × MaxWorkers, PoolSize)` - hybrid scaling
53+
- Worker-based: 20 handoffs per worker for burst processing
54+
- Pool-based: Scales directly with pool size
5555
- Takes the larger of the two for optimal performance
56-
- **Explicit values**: `max(50, set_value)` - enforces minimum 50 when set
57-
- **Always capped**: Queue size never exceeds `2 × PoolSize` for memory efficiency
56+
- **Explicit values**: `max(200, set_value)` - enforces minimum 200 when set
57+
- **Capping**: Queue size capped by `MaxActiveConns+1` (if set) or `5 × PoolSize` for memory efficiency
5858

59-
**Examples:**
60-
- Pool 10: Queue 50 (max(8×3, max(50, 5)) = max(24, 50) = 50)
61-
- Pool 100: Queue 80 (max(8×10, max(50, 50)) = max(80, 50) = 80)
62-
- Pool 200: Queue 100 (max(8×10, max(50, 100)) = max(80, 100) = 100)
59+
**Examples (without MaxActiveConns):**
60+
- Pool 10: Workers 5, Queue 100 (max(20×5, 10) = 100, capped at 5×10 = 50)
61+
- Pool 100: Workers 33, Queue 660 (max(20×33, 100) = 660, capped at 5×100 = 500)
62+
- Pool 200: Workers 66, Queue 1320 (max(20×66, 200) = 1320, capped at 5×200 = 1000)
63+
64+
**Examples (with MaxActiveConns=150):**
65+
- Pool 100: Workers 33, Queue 151 (max(20×33, 100) = 660, capped at MaxActiveConns+1 = 151)
66+
- Pool 200: Workers 66, Queue 151 (max(20×66, 200) = 1320, capped at MaxActiveConns+1 = 151)
6367

6468
## Notification Hooks
6569

@@ -94,7 +98,7 @@ type CustomHook struct{}
9498
func (h *CustomHook) PreHook(ctx context.Context, notificationCtx push.NotificationHandlerContext, notificationType string, notification []interface{}) ([]interface{}, bool) {
9599
// Log notification with connection details
96100
if conn, ok := notificationCtx.Conn.(*pool.Conn); ok {
97-
log.Printf("Processing %s on connection %d", notificationType, conn.GetID())
101+
log.Printf("Processing %s on conn[%d]", notificationType, conn.GetID())
98102
}
99103
return notification, true // Continue processing
100104
}

hitless/config.go

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -103,30 +103,25 @@ type Config struct {
103103

104104
// MaxWorkers is the maximum number of worker goroutines for processing handoff requests.
105105
// Workers are created on-demand and automatically cleaned up when idle.
106-
// If zero, defaults to min(10, PoolSize/3) to handle bursts effectively.
107-
// If explicitly set, enforces minimum of 10 workers.
106+
// If zero, defaults to min(10, PoolSize/2) to handle bursts effectively.
107+
// If explicitly set, enforces minimum of PoolSize/2
108108
//
109-
// Default: min(10, PoolSize/3), Minimum when set: 10
109+
// Default: min(PoolSize/2, max(10, PoolSize/3)), Minimum when set: PoolSize/2
110110
MaxWorkers int
111111

112112
// HandoffQueueSize is the size of the buffered channel used to queue handoff requests.
113113
// If the queue is full, new handoff requests will be rejected.
114114
// Scales with both worker count and pool size for better burst handling.
115115
//
116-
// Default: max(8x workers, max(50, PoolSize/2)), capped by 2x pool size
117-
// When set: min 50, capped by 2x pool size
116+
// Default: max(20×MaxWorkers, PoolSize), capped by MaxActiveConns+1 (if set) or 5×PoolSize
117+
// When set: minimum 200, capped by MaxActiveConns+1 (if set) or 5×PoolSize
118118
HandoffQueueSize int
119119

120120
// PostHandoffRelaxedDuration is how long to keep relaxed timeouts on the new connection
121121
// after a handoff completes. This provides additional resilience during cluster transitions.
122122
// Default: 2 * RelaxedTimeout
123123
PostHandoffRelaxedDuration time.Duration
124124

125-
// ScaleDownDelay is the delay before checking if workers should be scaled down.
126-
// This prevents expensive checks on every handoff completion and avoids rapid scaling cycles.
127-
// Default: 2 seconds
128-
ScaleDownDelay time.Duration
129-
130125
// LogLevel controls the verbosity of hitless upgrade logging.
131126
// LogLevelError (0) = errors only, LogLevelWarn (1) = warnings, LogLevelInfo (2) = info, LogLevelDebug (3) = debug
132127
// Default: LogLevelWarn (warnings)
@@ -152,7 +147,6 @@ func DefaultConfig() *Config {
152147
MaxWorkers: 0, // Auto-calculated based on pool size
153148
HandoffQueueSize: 0, // Auto-calculated based on max workers
154149
PostHandoffRelaxedDuration: 0, // Auto-calculated based on relaxed timeout
155-
ScaleDownDelay: 2 * time.Second,
156150
LogLevel: LogLevelWarn,
157151

158152
// Connection Handoff Configuration
@@ -212,6 +206,13 @@ func (c *Config) ApplyDefaults() *Config {
212206
// using the provided pool size to calculate worker defaults.
213207
// This ensures that partially configured structs get sensible defaults for missing fields.
214208
func (c *Config) ApplyDefaultsWithPoolSize(poolSize int) *Config {
209+
return c.ApplyDefaultsWithPoolConfig(poolSize, 0)
210+
}
211+
212+
// ApplyDefaultsWithPoolConfig applies default values to any zero-value fields in the configuration,
213+
// using the provided pool size and max active connections to calculate worker and queue defaults.
214+
// This ensures that partially configured structs get sensible defaults for missing fields.
215+
func (c *Config) ApplyDefaultsWithPoolConfig(poolSize int, maxActiveConns int) *Config {
215216
if c == nil {
216217
return DefaultConfig().ApplyDefaultsWithPoolSize(poolSize)
217218
}
@@ -251,19 +252,25 @@ func (c *Config) ApplyDefaultsWithPoolSize(poolSize int) *Config {
251252
// Apply worker defaults based on pool size
252253
result.applyWorkerDefaults(poolSize)
253254

254-
// Apply queue size defaults with hybrid scaling approach
255+
// Apply queue size defaults with new scaling approach
255256
if c.HandoffQueueSize <= 0 {
256-
// Default: max(8x workers, max(50, PoolSize/2)), capped by 2x pool size
257-
workerBasedSize := result.MaxWorkers * 8
258-
poolBasedSize := util.Max(50, poolSize/2)
257+
// Default: max(20x workers, PoolSize), capped by maxActiveConns or 5x pool size
258+
workerBasedSize := result.MaxWorkers * 20
259+
poolBasedSize := poolSize
259260
result.HandoffQueueSize = util.Max(workerBasedSize, poolBasedSize)
260261
} else {
261-
// When explicitly set: enforce minimum of 50
262-
result.HandoffQueueSize = util.Max(50, c.HandoffQueueSize)
262+
// When explicitly set: enforce minimum of 200
263+
result.HandoffQueueSize = util.Max(200, c.HandoffQueueSize)
263264
}
264265

265-
// Always cap queue size by 2x pool size - balances burst capacity with memory efficiency
266-
result.HandoffQueueSize = util.Min(result.HandoffQueueSize, poolSize*2)
266+
// Cap queue size: use maxActiveConns+1 if set, otherwise 5x pool size
267+
var queueCap int
268+
if maxActiveConns > 0 {
269+
queueCap = maxActiveConns + 1
270+
} else {
271+
queueCap = poolSize * 5
272+
}
273+
result.HandoffQueueSize = util.Min(result.HandoffQueueSize, queueCap)
267274

268275
// Ensure minimum queue size of 2 (fallback for very small pools)
269276
if result.HandoffQueueSize < 2 {
@@ -276,12 +283,6 @@ func (c *Config) ApplyDefaultsWithPoolSize(poolSize int) *Config {
276283
result.PostHandoffRelaxedDuration = c.PostHandoffRelaxedDuration
277284
}
278285

279-
if c.ScaleDownDelay <= 0 {
280-
result.ScaleDownDelay = defaults.ScaleDownDelay
281-
} else {
282-
result.ScaleDownDelay = c.ScaleDownDelay
283-
}
284-
285286
// LogLevel: 0 is a valid value (errors only), so we need to check if it was explicitly set
286287
// We'll use the provided value as-is, since 0 is valid
287288
result.LogLevel = c.LogLevel
@@ -314,7 +315,6 @@ func (c *Config) Clone() *Config {
314315
MaxWorkers: c.MaxWorkers,
315316
HandoffQueueSize: c.HandoffQueueSize,
316317
PostHandoffRelaxedDuration: c.PostHandoffRelaxedDuration,
317-
ScaleDownDelay: c.ScaleDownDelay,
318318
LogLevel: c.LogLevel,
319319

320320
// Configuration fields
@@ -330,11 +330,11 @@ func (c *Config) applyWorkerDefaults(poolSize int) {
330330
}
331331

332332
if c.MaxWorkers == 0 {
333-
// When not set: min(10, poolSize/3) - don't exceed 10 workers for small pools
334-
c.MaxWorkers = util.Min(10, poolSize/3)
333+
// When not set: min(poolSize/2, max(10, poolSize/3)) - balanced scaling approach
334+
c.MaxWorkers = util.Min(poolSize/2, util.Max(10, poolSize/3))
335335
} else {
336-
// When explicitly set: max(10, set_value) - ensure at least 10 workers
337-
c.MaxWorkers = util.Max(10, c.MaxWorkers)
336+
// When explicitly set: max(poolSize/2, set_value) - ensure at least poolSize/2 workers
337+
c.MaxWorkers = util.Max(poolSize/2, c.MaxWorkers)
338338
}
339339

340340
// Ensure minimum of 1 worker (fallback for very small pools)

hitless/errors.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,8 @@ var (
4343
// ErrConnectionInvalidHandoffState is returned when a connection is in an invalid state for handoff
4444
ErrConnectionInvalidHandoffState = errors.New("hitless: connection is in invalid state for handoff")
4545
)
46+
47+
// general errors
48+
var (
49+
ErrShutdown = errors.New("hitless: shutdown")
50+
)

hitless/example_hooks.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func (mh *MetricsHook) PreHook(ctx context.Context, notificationCtx push.Notific
3838

3939
// Log connection information if available
4040
if conn, ok := notificationCtx.Conn.(*pool.Conn); ok {
41-
internal.Logger.Printf(ctx, "hitless: metrics hook processing %s notification on connection %d", notificationType, conn.GetID())
41+
internal.Logger.Printf(ctx, "hitless: metrics hook processing %s notification on conn[%d]", notificationType, conn.GetID())
4242
}
4343

4444
// Store start time in context for duration calculation
@@ -62,7 +62,7 @@ func (mh *MetricsHook) PostHook(ctx context.Context, notificationCtx push.Notifi
6262

6363
// Log error details with connection information
6464
if conn, ok := notificationCtx.Conn.(*pool.Conn); ok {
65-
internal.Logger.Printf(ctx, "hitless: metrics hook recorded error for %s notification on connection %d: %v", notificationType, conn.GetID(), result)
65+
internal.Logger.Printf(ctx, "hitless: metrics hook recorded error for %s notification on conn[%d]: %v", notificationType, conn.GetID(), result)
6666
}
6767
}
6868
}

0 commit comments

Comments
 (0)