Skip to content

Commit aaedc23

Browse files
authored
[tmpnet] Watch for and report FATAL log entries on node startup (#3535)
1 parent 5dfe909 commit aaedc23

File tree

1 file changed

+73
-2
lines changed

1 file changed

+73
-2
lines changed

tests/fixture/tmpnet/node_process.go

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package tmpnet
55

66
import (
7+
"bufio"
78
"context"
89
"encoding/json"
910
"errors"
@@ -98,6 +99,15 @@ func (p *NodeProcess) Start(w io.Writer) error {
9899
return err
99100
}
100101

102+
// Watch the node's main.log file in the background for FATAL log entries that indicate
103+
// a configuration error preventing startup. Such a log entry will be provided to the
104+
// cancelWithCause function so that waitForProcessContext can exit early with an error
105+
// that includes the log entry.
106+
ctx, cancelWithCause := context.WithCancelCause(context.Background())
107+
defer cancelWithCause(nil)
108+
logPath := p.node.GetDataDir() + "/logs/main.log"
109+
go watchLogFileForFatal(ctx, cancelWithCause, w, logPath)
110+
101111
// Determine appropriate level of node description detail
102112
dataDir := p.node.GetDataDir()
103113
nodeDescription := fmt.Sprintf("node %q", p.node.NodeID)
@@ -113,7 +123,7 @@ func (p *NodeProcess) Start(w io.Writer) error {
113123
// A node writes a process context file on start. If the file is not
114124
// found in a reasonable amount of time, the node is unlikely to have
115125
// started successfully.
116-
if err := p.waitForProcessContext(context.Background()); err != nil {
126+
if err := p.waitForProcessContext(ctx); err != nil {
117127
return fmt.Errorf("failed to start local node: %w", err)
118128
}
119129

@@ -199,7 +209,7 @@ func (p *NodeProcess) waitForProcessContext(ctx context.Context) error {
199209

200210
select {
201211
case <-ctx.Done():
202-
return fmt.Errorf("failed to load process context for node %q before timeout: %w", p.node.NodeID, ctx.Err())
212+
return fmt.Errorf("failed to load process context for node %q: %w", p.node.NodeID, context.Cause(ctx))
203213
case <-ticker.C:
204214
}
205215
}
@@ -331,3 +341,64 @@ func (p *NodeProcess) writeMonitoringConfigFile(tmpnetDir string, name string, c
331341

332342
return nil
333343
}
344+
345+
// watchLogFileForFatal waits for the specified file path to exist and then checks each of
346+
// its lines for the string 'FATAL' until such a line is observed or the provided context
347+
// is canceled. If line containing 'FATAL' is encountered, it will be provided as an error
348+
// to the provided cancelWithCause function.
349+
//
350+
// Errors encountered while looking for FATAL log entries are considered potential rather
351+
// than positive indications of failure and are printed to the provided writer instead of
352+
// being provided to the cancelWithCause function.
353+
func watchLogFileForFatal(ctx context.Context, cancelWithCause context.CancelCauseFunc, w io.Writer, path string) {
354+
waitInterval := 100 * time.Millisecond
355+
// Wait for the file to exist
356+
fileExists := false
357+
for !fileExists {
358+
select {
359+
case <-ctx.Done():
360+
return
361+
default:
362+
if _, err := os.Stat(path); os.IsNotExist(err) {
363+
// File does not exist yet - wait and try again
364+
time.Sleep(waitInterval)
365+
} else {
366+
fileExists = true
367+
}
368+
}
369+
}
370+
371+
// Open the file
372+
file, err := os.Open(path)
373+
if err != nil {
374+
_, _ = fmt.Fprintf(w, "failed to open %s: %v", path, err)
375+
return
376+
}
377+
defer file.Close()
378+
379+
// Scan for lines in the file containing 'FATAL'
380+
reader := bufio.NewReader(file)
381+
for {
382+
select {
383+
case <-ctx.Done():
384+
return
385+
default:
386+
// Read a line from the file
387+
line, err := reader.ReadString('\n')
388+
if err != nil {
389+
if errors.Is(err, io.EOF) {
390+
// If end of file is reached, wait and try again
391+
time.Sleep(waitInterval)
392+
continue
393+
} else {
394+
_, _ = fmt.Fprintf(w, "error reading %s: %v\n", path, err)
395+
return
396+
}
397+
}
398+
if strings.Contains(line, "FATAL") {
399+
cancelWithCause(errors.New(line))
400+
return
401+
}
402+
}
403+
}
404+
}

0 commit comments

Comments
 (0)