Skip to content

Commit a36547b

Browse files
suggestion: simplify exponential backoff (#1970)
* removes backoff. * Adjusts comment. * Suggestions by @dunglas * Removes 'max_consecutive_failures' * Removes 'max_consecutive_failures' * Adjusts warning. * Disables the logger in tests. * Revert "Adjusts warning." This reverts commit e93a6a9. * Revert "Removes 'max_consecutive_failures'" This reverts commit ba28ea0. * Revert "Removes 'max_consecutive_failures'" This reverts commit 32e649c. * Only fails on max failures again. * Restores failure timings.
1 parent 4161623 commit a36547b

File tree

7 files changed

+57
-149
lines changed

7 files changed

+57
-149
lines changed

frankenphp_test.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -601,10 +601,12 @@ func testRequestHeaders(t *testing.T, opts *testOptions) {
601601
}
602602

603603
func TestFailingWorker(t *testing.T) {
604-
runTest(t, func(handler func(http.ResponseWriter, *http.Request), _ *httptest.Server, i int) {
605-
body, _ := testGet("http://example.com/failing-worker.php", handler, t)
606-
assert.Contains(t, body, "ok")
607-
}, &testOptions{workerScript: "failing-worker.php"})
604+
err := frankenphp.Init(
605+
frankenphp.WithLogger(slog.New(slog.NewTextHandler(io.Discard, nil))),
606+
frankenphp.WithWorkers("failing worker", "testdata/failing-worker.php", 4, frankenphp.WithWorkerMaxFailures(1)),
607+
frankenphp.WithNumThreads(5),
608+
)
609+
assert.Error(t, err, "should return an immediate error if workers fail on startup")
608610
}
609611

610612
func TestEnv(t *testing.T) {

internal/backoff/backoff.go

Lines changed: 0 additions & 59 deletions
This file was deleted.

internal/backoff/backoff_test.go

Lines changed: 0 additions & 41 deletions
This file was deleted.

phpmainthread_test.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,19 @@ func TestFinishBootingAWorkerScript(t *testing.T) {
175175

176176
func TestReturnAnErrorIf2WorkersHaveTheSameFileName(t *testing.T) {
177177
workers = []*worker{}
178-
w, err1 := newWorker(workerOpt{fileName: "filename.php", maxConsecutiveFailures: defaultMaxConsecutiveFailures})
178+
w, err1 := newWorker(workerOpt{fileName: "filename.php"})
179179
workers = append(workers, w)
180-
_, err2 := newWorker(workerOpt{fileName: "filename.php", maxConsecutiveFailures: defaultMaxConsecutiveFailures})
180+
_, err2 := newWorker(workerOpt{fileName: "filename.php"})
181181

182182
assert.NoError(t, err1)
183183
assert.Error(t, err2, "two workers cannot have the same filename")
184184
}
185185

186186
func TestReturnAnErrorIf2ModuleWorkersHaveTheSameName(t *testing.T) {
187187
workers = []*worker{}
188-
w, err1 := newWorker(workerOpt{fileName: "filename.php", name: "workername", maxConsecutiveFailures: defaultMaxConsecutiveFailures})
188+
w, err1 := newWorker(workerOpt{fileName: "filename.php", name: "workername"})
189189
workers = append(workers, w)
190-
_, err2 := newWorker(workerOpt{fileName: "filename2.php", name: "workername", maxConsecutiveFailures: defaultMaxConsecutiveFailures})
190+
_, err2 := newWorker(workerOpt{fileName: "filename2.php", name: "workername"})
191191

192192
assert.NoError(t, err1)
193193
assert.Error(t, err2, "two workers cannot have the same name")
@@ -198,9 +198,8 @@ func getDummyWorker(fileName string) *worker {
198198
workers = []*worker{}
199199
}
200200
worker, _ := newWorker(workerOpt{
201-
fileName: testDataPath + "/" + fileName,
202-
num: 1,
203-
maxConsecutiveFailures: defaultMaxConsecutiveFailures,
201+
fileName: testDataPath + "/" + fileName,
202+
num: 1,
204203
})
205204
workers = append(workers, worker)
206205
return worker

testdata/failing-worker.php

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,7 @@
11
<?php
22

3-
$fail = random_int(1, 100) < 10;
4-
$wait = random_int(1000 * 100, 1000 * 500); // wait 100ms - 500ms
5-
6-
usleep($wait);
7-
if ($fail) {
8-
exit(1);
3+
if (rand(1, 100) <= 50) {
4+
throw new Exception('this exception is expected to fail the worker');
95
}
106

11-
while (frankenphp_handle_request(function () {
12-
echo "ok";
13-
})) {
14-
$fail = random_int(1, 100) < 10;
15-
if ($fail) {
16-
exit(1);
17-
}
18-
}
7+
// frankenphp_handle_request() has not been reached (also a failure)

threadworker.go

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"time"
1111
"unsafe"
1212

13-
"github.com/dunglas/frankenphp/internal/backoff"
1413
"github.com/dunglas/frankenphp/internal/state"
1514
)
1615

@@ -23,20 +22,15 @@ type workerThread struct {
2322
worker *worker
2423
dummyContext *frankenPHPContext
2524
workerContext *frankenPHPContext
26-
backoff *backoff.ExponentialBackoff
2725
isBootingScript bool // true if the worker has not reached frankenphp_handle_request yet
26+
failureCount int // number of consecutive startup failures
2827
}
2928

3029
func convertToWorkerThread(thread *phpThread, worker *worker) {
3130
thread.setHandler(&workerThread{
3231
state: thread.state,
3332
thread: thread,
3433
worker: worker,
35-
backoff: &backoff.ExponentialBackoff{
36-
MaxBackoff: 1 * time.Second,
37-
MinBackoff: 100 * time.Millisecond,
38-
MaxConsecutiveFailures: worker.maxConsecutiveFailures,
39-
},
4034
})
4135
worker.attachThread(thread)
4236
}
@@ -92,7 +86,6 @@ func (handler *workerThread) name() string {
9286
}
9387

9488
func setupWorkerScript(handler *workerThread, worker *worker) {
95-
handler.backoff.Wait()
9689
metrics.StartWorker(worker.name)
9790

9891
if handler.state.Is(state.Ready) {
@@ -132,7 +125,6 @@ func tearDownWorkerScript(handler *workerThread, exitStatus int) {
132125
// on exit status 0 we just run the worker script again
133126
if exitStatus == 0 && !handler.isBootingScript {
134127
metrics.StopWorker(worker.name, StopReasonRestart)
135-
handler.backoff.RecordSuccess()
136128
logger.LogAttrs(ctx, slog.LevelDebug, "restarting", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex), slog.Int("exit_status", exitStatus))
137129

138130
return
@@ -148,16 +140,30 @@ func tearDownWorkerScript(handler *workerThread, exitStatus int) {
148140
return
149141
}
150142

151-
logger.LogAttrs(ctx, slog.LevelError, "worker script has not reached frankenphp_handle_request()", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex))
152-
153-
// panic after exponential backoff if the worker has never reached frankenphp_handle_request
154-
if handler.backoff.RecordFailure() {
155-
if !watcherIsEnabled && !handler.state.Is(state.Ready) {
156-
logger.LogAttrs(ctx, slog.LevelError, "too many consecutive worker failures", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex), slog.Int("failures", handler.backoff.FailureCount()))
157-
panic("too many consecutive worker failures")
143+
if worker.maxConsecutiveFailures >= 0 && startupFailChan != nil && !watcherIsEnabled && handler.failureCount >= worker.maxConsecutiveFailures {
144+
select {
145+
case startupFailChan <- fmt.Errorf("worker failure: script %s has not reached frankenphp_handle_request()", worker.fileName):
146+
handler.thread.state.Set(state.ShuttingDown)
147+
return
158148
}
159-
logger.LogAttrs(ctx, slog.LevelWarn, "many consecutive worker failures", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex), slog.Int("failures", handler.backoff.FailureCount()))
160149
}
150+
151+
if watcherIsEnabled {
152+
// worker script has probably failed due to script changes while watcher is enabled
153+
logger.LogAttrs(ctx, slog.LevelWarn, "(watcher enabled) worker script has not reached frankenphp_handle_request()", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex))
154+
} else {
155+
// rare case where worker script has failed on a restart during normal operation
156+
// this can happen if startup success depends on external resources
157+
logger.LogAttrs(ctx, slog.LevelWarn, "worker script has failed on restart", slog.String("worker", worker.name), slog.Int("thread", handler.thread.threadIndex))
158+
}
159+
160+
// wait a bit and try again (exponential backoff)
161+
backoffDuration := time.Duration(handler.failureCount*handler.failureCount*100) * time.Millisecond
162+
if backoffDuration > time.Second {
163+
backoffDuration = time.Second
164+
}
165+
handler.failureCount++
166+
time.Sleep(backoffDuration)
161167
}
162168

163169
// waitForWorkerRequest is called during frankenphp_handle_request in the php worker script.
@@ -171,6 +177,7 @@ func (handler *workerThread) waitForWorkerRequest() (bool, any) {
171177
// Clear the first dummy request created to initialize the worker
172178
if handler.isBootingScript {
173179
handler.isBootingScript = false
180+
handler.failureCount = 0
174181
if !C.frankenphp_shutdown_dummy_request() {
175182
panic("Not in CGI context")
176183
}

worker.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,41 +31,52 @@ type worker struct {
3131
var (
3232
workers []*worker
3333
watcherIsEnabled bool
34+
startupFailChan chan (error)
3435
)
3536

3637
func initWorkers(opt []workerOpt) error {
3738
workers = make([]*worker, 0, len(opt))
38-
workersReady := sync.WaitGroup{}
3939
directoriesToWatch := getDirectoriesToWatch(opt)
4040
watcherIsEnabled = len(directoriesToWatch) > 0
41+
totalThreadsToStart := 0
4142

4243
for _, o := range opt {
4344
w, err := newWorker(o)
4445
if err != nil {
4546
return err
4647
}
48+
totalThreadsToStart += w.num
4749
workers = append(workers, w)
4850
}
4951

52+
startupFailChan = make(chan error, totalThreadsToStart)
53+
var workersReady sync.WaitGroup
5054
for _, w := range workers {
51-
workersReady.Add(w.num)
5255
for i := 0; i < w.num; i++ {
5356
thread := getInactivePHPThread()
5457
convertToWorkerThread(thread, w)
55-
go func() {
56-
thread.state.WaitFor(state.Ready)
57-
workersReady.Done()
58-
}()
58+
workersReady.Go(func() {
59+
thread.state.WaitFor(state.Ready, state.ShuttingDown, state.Done)
60+
})
5961
}
6062
}
6163

6264
workersReady.Wait()
6365

66+
select {
67+
case err := <-startupFailChan:
68+
// at least 1 worker has failed, shut down and return an error
69+
Shutdown()
70+
return fmt.Errorf("failed to initialize workers: %w", err)
71+
default:
72+
// all workers started successfully
73+
startupFailChan = nil
74+
}
75+
6476
if !watcherIsEnabled {
6577
return nil
6678
}
6779

68-
watcherIsEnabled = true
6980
if err := watcher.InitWatcher(directoriesToWatch, RestartWorkers, logger); err != nil {
7081
return err
7182
}

0 commit comments

Comments
 (0)