4
4
package tmpnet
5
5
6
6
import (
7
+ "bufio"
7
8
"context"
8
9
"encoding/json"
9
10
"errors"
@@ -98,6 +99,15 @@ func (p *NodeProcess) Start(w io.Writer) error {
98
99
return err
99
100
}
100
101
102
+ // Watch the node's main.log file in the background for FATAL log entries that indicate
103
+ // a configuration error preventing startup. Such a log entry will be provided to the
104
+ // cancelWithCause function so that waitForProcessContext can exit early with an error
105
+ // that includes the log entry.
106
+ ctx , cancelWithCause := context .WithCancelCause (context .Background ())
107
+ defer cancelWithCause (nil )
108
+ logPath := p .node .GetDataDir () + "/logs/main.log"
109
+ go watchLogFileForFatal (ctx , cancelWithCause , w , logPath )
110
+
101
111
// Determine appropriate level of node description detail
102
112
dataDir := p .node .GetDataDir ()
103
113
nodeDescription := fmt .Sprintf ("node %q" , p .node .NodeID )
@@ -113,7 +123,7 @@ func (p *NodeProcess) Start(w io.Writer) error {
113
123
// A node writes a process context file on start. If the file is not
114
124
// found in a reasonable amount of time, the node is unlikely to have
115
125
// started successfully.
116
- if err := p .waitForProcessContext (context . Background () ); err != nil {
126
+ if err := p .waitForProcessContext (ctx ); err != nil {
117
127
return fmt .Errorf ("failed to start local node: %w" , err )
118
128
}
119
129
@@ -199,7 +209,7 @@ func (p *NodeProcess) waitForProcessContext(ctx context.Context) error {
199
209
200
210
select {
201
211
case <- ctx .Done ():
202
- return fmt .Errorf ("failed to load process context for node %q before timeout : %w" , p .node .NodeID , ctx . Err ( ))
212
+ return fmt .Errorf ("failed to load process context for node %q: %w" , p .node .NodeID , context . Cause ( ctx ))
203
213
case <- ticker .C :
204
214
}
205
215
}
@@ -331,3 +341,64 @@ func (p *NodeProcess) writeMonitoringConfigFile(tmpnetDir string, name string, c
331
341
332
342
return nil
333
343
}
344
+
345
+ // watchLogFileForFatal waits for the specified file path to exist and then checks each of
346
+ // its lines for the string 'FATAL' until such a line is observed or the provided context
347
+ // is canceled. If line containing 'FATAL' is encountered, it will be provided as an error
348
+ // to the provided cancelWithCause function.
349
+ //
350
+ // Errors encountered while looking for FATAL log entries are considered potential rather
351
+ // than positive indications of failure and are printed to the provided writer instead of
352
+ // being provided to the cancelWithCause function.
353
+ func watchLogFileForFatal (ctx context.Context , cancelWithCause context.CancelCauseFunc , w io.Writer , path string ) {
354
+ waitInterval := 100 * time .Millisecond
355
+ // Wait for the file to exist
356
+ fileExists := false
357
+ for ! fileExists {
358
+ select {
359
+ case <- ctx .Done ():
360
+ return
361
+ default :
362
+ if _ , err := os .Stat (path ); os .IsNotExist (err ) {
363
+ // File does not exist yet - wait and try again
364
+ time .Sleep (waitInterval )
365
+ } else {
366
+ fileExists = true
367
+ }
368
+ }
369
+ }
370
+
371
+ // Open the file
372
+ file , err := os .Open (path )
373
+ if err != nil {
374
+ _ , _ = fmt .Fprintf (w , "failed to open %s: %v" , path , err )
375
+ return
376
+ }
377
+ defer file .Close ()
378
+
379
+ // Scan for lines in the file containing 'FATAL'
380
+ reader := bufio .NewReader (file )
381
+ for {
382
+ select {
383
+ case <- ctx .Done ():
384
+ return
385
+ default :
386
+ // Read a line from the file
387
+ line , err := reader .ReadString ('\n' )
388
+ if err != nil {
389
+ if errors .Is (err , io .EOF ) {
390
+ // If end of file is reached, wait and try again
391
+ time .Sleep (waitInterval )
392
+ continue
393
+ } else {
394
+ _ , _ = fmt .Fprintf (w , "error reading %s: %v\n " , path , err )
395
+ return
396
+ }
397
+ }
398
+ if strings .Contains (line , "FATAL" ) {
399
+ cancelWithCause (errors .New (line ))
400
+ return
401
+ }
402
+ }
403
+ }
404
+ }
0 commit comments